This file explains all data processing and analysis steps for [Project Title]. The RProject has a private library in order to make all steps computationally reproducible. I use the renv package for this. That means you will need to install the package and run the renv::restore command, see instructions here. This works best if you don’t have the library and staging folder within the renv folder.
# pacman makes it easier to load and install packages
if (!requireNamespace("pacman"))
install.packages("pacman")
library(pacman)
# load packages
p_load(
knitr,
MASS,
Matrix,
mgcv,
Rmisc,
here,
DT,
cowplot,
pastecs,
lme4,
afex,
lsmeans,
MuMIn,
tidyverse
)
# set seed
set.seed(42)
# set theme for ggplot
theme_set(theme_cowplot())
Below the custom functions I’ll be using throughout the script.
### FUNCTION 1 ###
# function that transforms feet and inches to cm
feet_inches_to_cm <-
function(feet, inches){
feet_to_cm <- feet * 30.48
inches_to_cm <- inches * 2.54
cm <- feet_to_cm + inches_to_cm
cm <- round(cm, digits = 0)
return(cm)
}
### FUNCTION 2 ###
# function that transforms stones and pounds to kg
stones_pounds_to_kg <-
function(pounds, stones=NULL){
if(missing(stones)){
pounds_to_kg <- pounds * 0.453592
kg <- pounds_to_kg
kg <- round(kg, digits = 0)
return(kg)
}
else{
stones_to_kg <- stones * 6.35029
pounds_to_kg <- pounds * 0.453592
kg <- stones_to_kg + pounds_to_kg
kg <- round(kg, digits = 0)
return(kg)
}
}
### FUNCTION 3 ###
# function that gives summary statistics and a densityplot, with different levels of repeated vs. trait-like measures
describe_visualize <-
function(df,
variable,
repeated_measure = FALSE,
want_summary = FALSE){
variable <- enquo(variable)
# specifies whether the variable we want to plot is a trait-like or repeated measure
if (repeated_measure == FALSE){
df <-
df %>%
group_by(pp) %>%
slice(1) %>%
ungroup()
} else {
df <-
df
}
# descriptive stats
sum_stats <-
df %>%
pull(!! variable) %>%
stat.desc()
# plot
plot <-
ggplot(df, aes(x = !! variable)) +
geom_density(color = "darkgrey", fill = "darkgrey") +
geom_point(aes(x = !! variable, y = 0))
# return the two (if both are wanted)
if(want_summary == TRUE){
return(list(kable(sum_stats), plot))
} else{
return(plot)
}
}
### FUNCTION 4 ###
# function that returns a table for trait-like categorical variables
my_table <-
function(df, variable){
variable <- enquo(variable)
df %>%
group_by(pp) %>%
slice(1) %>%
pull(!! variable) %>%
table()
}
### FUNCTION 5 ###
# raincloud plot function from https://github.com/RainCloudPlots/RainCloudPlots/blob/master/tutorial_R/R_rainclouds.R
# Defining the geom_flat_violin function ----
# Note: the below code modifies the
# existing github page by removing a parenthesis in line 50
"%||%" <- function(a, b) {
if (!is.null(a)) a else b
}
geom_flat_violin <- function(mapping = NULL, data = NULL, stat = "ydensity",
position = "dodge", trim = TRUE, scale = "area",
show.legend = NA, inherit.aes = TRUE, ...) {
layer(
data = data,
mapping = mapping,
stat = stat,
geom = GeomFlatViolin,
position = position,
show.legend = show.legend,
inherit.aes = inherit.aes,
params = list(
trim = trim,
scale = scale,
...
)
)
}
#' @rdname ggplot2-ggproto
#' @format NULL
#' @usage NULL
#' @export
GeomFlatViolin <-
ggproto("GeomFlatViolin", Geom,
setup_data = function(data, params) {
data$width <- data$width %||%
params$width %||% (resolution(data$x, FALSE) * 0.9)
# ymin, ymax, xmin, and xmax define the bounding rectangle for each group
data %>%
group_by(group) %>%
mutate(
ymin = min(y),
ymax = max(y),
xmin = x,
xmax = x + width / 2
)
},
draw_group = function(data, panel_scales, coord) {
# Find the points for the line to go all the way around
data <- transform(data,
xminv = x,
xmaxv = x + violinwidth * (xmax - x)
)
# Make sure it's sorted properly to draw the outline
newdata <- rbind(
plyr::arrange(transform(data, x = xminv), y),
plyr::arrange(transform(data, x = xmaxv), -y)
)
# Close the polygon: set first and last point the same
# Needed for coord_polar and such
newdata <- rbind(newdata, newdata[1, ])
ggplot2:::ggname("geom_flat_violin", GeomPolygon$draw_panel(newdata, panel_scales, coord))
},
draw_key = draw_key_polygon,
default_aes = aes(
weight = 1, colour = "grey20", fill = "white", size = 0.5,
alpha = NA, linetype = "solid"
),
required_aes = c("x", "y")
)
### FUNCTION 6 ###
# creates raincloud plots
rc_plot <-
function(
df,
measurement,
variable,
group,
xlab,
title,
facet = NULL
) {
rc_plot <-
ggplot(
df %>%
filter(measure == measurement),
aes_string(
x = group,
y = variable,
fill = group,
color = group
)
) +
geom_flat_violin(position = position_nudge(x = .2,
y = 0),
adjust = 2) +
geom_point(position = position_jitter(width = .15),
size = .10) +
ylab("Rating (0-100)") +
xlab(xlab) +
coord_flip() +
guides(fill = FALSE,
color = FALSE) +
scale_color_brewer(palette = "Dark2") +
scale_fill_brewer(palette = "Dark2") +
ggtitle(title)
if(!is.null(facet)){
rc_plot <-
rc_plot +
facet_wrap(facet)
}
return(rc_plot)
}
### FUNCTION 6 ###
# creates line plots
line_plot <-
function(
df,
x_group,
measure,
group_by
) {
measure <- enquo(measure)
ggplot(df,
aes_string(
x = x_group,
y = measure,
group = group_by,
color = group_by,
linetype = group_by
)
) +
geom_line() +
geom_point() +
geom_errorbar(
aes(
ymin = !! measure - se,
ymax = !! measure + se),
width = .05) +
scale_color_brewer(palette = "Dark2")
}
### FUNCTION 7 ###
# function that calculates proportion of residuals above a cut-off for an lmer object
proportion_residuals <-
function(model, cut_off){
# scale the model residuals
model_resid <- resid(model, scaled = TRUE)
# take the absolute
model_resid <- abs(model_resid)
# select those below cut_off and divide by total number of residuals
proportion <- sum(model_resid > cut_off) / length(model_resid)
return(proportion)
}
### FUNCTION 8 ###
# shows the proportions from the above function in a table
lmer_residuals <-
function(lmer_model){
# proportion of scaled residuals that +- 2, 2.5, and 3
residuals_2 <- proportion_residuals(lmer_model, 2)
residuals_25 <- proportion_residuals(lmer_model, 2.5)
residuals_3 <- proportion_residuals(lmer_model, 3)
# put those percentages into a tibble, add whether they pass a cut-off, and produce nice table
resid_proportions <-
tibble(
standardized_cutoff = c(2, 2.5, 3),
proportion_cutoff = c(.05, .01, .0001),
proportion = c(
residuals_2,
residuals_25,
residuals_3
),
below_cutoff = proportion < proportion_cutoff
)
print(resid_proportions)
}
### FUNCTION 8 ###
# function takes the formal outlier values on cook's distance from an lmer project and arranges them from highest to lowest
arrange_cook <-
function(outliers, grouping_factor){
cooks.distance(outliers) %>%
as_tibble(rownames = as.character(grouping_factor)) %>%
rename(cooks_distance = V1) %>%
arrange(desc(cooks_distance)) %>%
head()
}
First, we load the data. For a codebook, see the [enter file name] file. Note that the Qualtrics output format is quite the nightmare for importing; found a solution here.
headers <- read_csv(
here("data", "study2", "raw_data.csv"),
col_names = FALSE,
n_max = 1) %>%
as.character() # variable names stored in a vector
raw_data <- read_csv(
here("data", "study2", "raw_data.csv"),
col_names = headers, # use that name vector here to name variables
na = "",
skip = 3 # those are the three rows that contain the variable info
)
rm(headers) # clear names from workspace
The data frame is extremely wide; we have 854 variables. This has several reasons:
We need to get these data into the long format. Before that, let’s only keep those variables we need and simultaneously give them proper names. Also, the response_id already is a unique identifier, but not easy to read. I’ll add a participant number (pp). Last, because I’m cleaning the raw data, I’ll use a new data object: working_file
working_file <-
raw_data %>%
select(
start_date = StartDate,
end_date = EndDate,
duration = `Duration (in seconds)`,
finished = Finished,
response_id = ResponseId,
fullfil_criteria = Q3,
consent = Q8,
prolific_id = Q9,
group = Group,
hungry = Q11_1,
thirsty = Q11_2,
desire_instructions_time = `Q13_Page Submit`,
pb_1_c_attr_1:`mb_20_enh_attr_time_Click Count`, # all attractiveness variables
simulations_instructions_time = `Q41_Page Submit`,
pb_1_c_sim_1:`mb_20_enh_sim_time_Click Count`, # all simulation variables
age = Q19,
gender = Q20,
height_choice = Q21,
height_cm = Q22_1,
height_feet = Q23_1,
height_inches = Q23_2,
weight_choice = Q24,
weight_kilos = Q25_1,
weight_stones = Q26_1,
weight_pounds = Q26_2,
weight_pounds_only = Q27_1,
diet = Q28,
diet_details = Q29,
meat_per_week = Q30,
change_diet = Q31_1,
attitude_meat = attitude_meat_1,
attitude_vegan = attitude_vegan_1,
attitude_pb = attitude_pb_1,
allergies = Q32,
allergies_details = Q33,
language_issues = Q34,
language_issues_details = Q35,
didnt_like = Q36,
study_about = Q37,
technical_issues = Q38,
-contains("Click"), # omit click count variables
) %>%
mutate(pp = paste0("pp_", row_number())) %>%
select(# add participant number and set it as first column
pp,
everything()
)
Next, I assign the correct variable type. In addition, I exported the Qualtrics data with numbers as output, so I will reintroduce the factor levels for factor variables.
working_file <-
working_file %>%
mutate_at(
vars(
pp,
finished,
response_id:group,
gender,
height_choice,
weight_choice,
diet,
allergies,
language_issues
),
list(~ as.factor(.))
) %>%
mutate(
finished = fct_recode(
finished,
no = "0",
yes = "1"
),
gender = fct_recode(
gender,
male = "1",
female = "2",
other = "3"
),
diet = fct_recode(
diet,
omnivore = "1",
pescatarian = "2",
vegetarian = "3",
vegan = "4",
other = "5"
),
height_choice = fct_recode(
height_choice,
cm = "1",
feet_inches = "2"
),
weight_choice = fct_recode(
weight_choice,
kg = "1",
stones_pounds = "2",
pounds_only = "3"
)
) %>%
mutate_at(
vars(
fullfil_criteria,
consent,
allergies,
language_issues
),
list(
~ fct_recode(
.,
yes = "1",
no = "2"
)
)
)
Upon inspection, I saw that meat eating frequency (meat_per_week) has two non-numerical entries: 1/2 and less than once. I don’t want to remove the latter, so I’ll convert both to a number (0.5).
working_file <-
working_file %>%
mutate(
meat_per_week = case_when(
meat_per_week %in% c("1/2", "less than once") ~ 0.5,
TRUE ~ as.numeric(meat_per_week)
)
)
Similarly, there are text entries in weight_kilos and weight_pounds. In weight_kilos there’s a text entry explaining that the respondent doesn’t know their weight, so we’ll set that to NA. In weight_pounds, the respondent indicated stones, but "N/A" for pounds, so we’ll set that to 0, assuming the respondent weighed around the indicated weight in stones. We’ll do the same for all those who only provided stones, but no pounds.
working_file <-
working_file %>%
mutate(
weight_kilos = as.numeric(
na_if(weight_kilos, "I don't lnow unfortuantely and I do not have acces to a set of scales qhilw filling out this survey. I applogise for this, but I was not informed I would need this prior to starting the survey.")
),
weight_pounds = as.numeric(
na_if(weight_pounds, "N/A")
),
weight_pounds = case_when(
weight_choice == "stones_pounds" & is.na(weight_pounds) ~ 0,
TRUE ~ weight_pounds
)
)
Because the study was conducted in the UK, we also need to transform height and weight to cm and kg, respectively.
working_file <-
working_file %>%
mutate(
height_cm = case_when(
height_choice == "cm" ~ height_cm,
height_choice == "feet_inches" ~ feet_inches_to_cm(height_feet, height_inches)
),
weight_kilos = case_when(
weight_choice == "kg" ~ weight_kilos,
weight_choice == "stones_pounds" ~ stones_pounds_to_kg(weight_pounds, weight_stones),
weight_choice == "pounds_only" ~ stones_pounds_to_kg(weight_pounds_only)
)
)
Now that the data are cleaned, I can finally transform them to the long format. Currently, the measurements of attractiveness and simulations are in the following format: pb_1_c_attr_1. The components (separated by underscores) of that format mean:
_1Let’s remove the _1 at the end of those variable names, and also remove all Page Submit appendices from their variable names.
working_file <-
working_file %>%
rename_at(
vars(
ends_with("_1"),
),
list(
~ str_sub(
., 1, str_length(.)-2
)
)
) %>%
rename_at(
vars(
ends_with("Page Submit")
),
list(
~ str_replace(
., "_Page Submit", ""
)
)
)
Let’s get to turning the data into the long format. Because we used two sets of stimuli (counterbalancing what stimulus belongs to what food type and label type), half of the participants gave responses to half of the measurement variables; the other half of participant gave responses to the other half.
Therefore, participants will have missing values on those variables that belonged to the other set (i.e., the other counterbalancing condition). Luckily, the pivot_longer function is amazing, so we can drop those measurements per participants with the values_drop_na argument. This command has the nice side effect of also excluding those who did not consent to participate (because they have NAs everywhere.)
working_file <-
working_file %>%
pivot_longer(
cols = c(contains("pb_"), contains("mb_")),
names_to = c( # specifict the variables to be formed from the current variable names
"food_type",
"stimulus_no",
"label_type",
"measure"
),
values_to = c( # the values, which will be measurement type (attractiveness vs. simulations) and the timer per variable
"rating",
"time"
),
names_sep = "_",
values_drop_na = TRUE # do not include empty entries due to counterbalancing as rows in the long format
) %>%
# give proper variable names, labels, and variable types
mutate(
stimulus_no = as.numeric(stimulus_no)
) %>%
mutate(
food_type = fct_recode(
as.factor(food_type),
meat_based = "mb",
plant_based = "pb",
),
label_type = fct_recode(
as.factor(label_type),
control = "c",
enhanced = "enh"
),
measure = fct_recode(
as.factor(measure),
attractiveness = "attr",
simulations = "sim"
)
) %>% # arbitrary, but I like to order the variables roughly in the order they were collected
select(
pp:simulations_instructions_time,
food_type:time,
everything()
)
Alright, the data are in a tidy format. Below, I show the data of a random participant for illustration.
There are still a couple of test runs left (three in total). They can be identified because they do not follow the Prolific ID scheme (i.e., starting with a 5, less than 20 characters). We exclude them here.
# before excluding testruns
length(unique(working_file$pp))
## [1] 178
working_file <-
working_file %>%
filter(
str_length(
as.character(prolific_id)
) > 20
)
# after exclusion
length(unique(working_file$pp))
## [1] 175
Now exclude those who didn’t finish the survey.
length(unique(working_file$pp))
## [1] 175
working_file <-
working_file %>%
filter(finished == "yes")
length(unique(working_file$pp))
## [1] 174
Out of experience, Qualtrics isn’t very good at determining whether a survey is finished or not. We double check this: If indeed all participants gave all ratings, there should be 80 for each of the remaining 174 participants: 40 attractiveness ratings and 40 simulations ratings. Furthermore, there should be few NAs remaining on any of the ratings.
There are three cases where participants did not report a rating. I double checked that in the raw data file and that’s indeed the case. Respondents were not forced to give a response (Qualtrics option was “Request Response”), so participants simply did not respond to those three items.
# does each participant have 80 ratings (i.e., rows)?
working_file %>%
group_by(pp) %>%
count() %>%
ungroup() %>%
summarize(
all_there = sum(n == 80)
)
## # A tibble: 1 x 1
## all_there
## <int>
## 1 174
# compare to sample
length(unique(working_file$pp))
## [1] 174
# are there NAs left on the rating or timing variable?
working_file %>%
select(rating, time) %>%
summarize_all(
list(~ sum(is.na(.)))
)
## # A tibble: 1 x 2
## rating time
## <int> <int>
## 1 3 0
working_file %>%
filter(is.na(rating)) %>%
DT::datatable(
.,
options = list(
autoWidth = TRUE,
scrollY = TRUE,
scrollX = TRUE,
pageLength = 3
)
)
There was a filter question at the beginning of the study asking participants whether they fulfill the inclusion criteria. All participants indicated they fulfilled the criteria, so no need to exclude anyone.
working_file %>%
group_by(pp) %>%
slice(1) %>%
pull(fullfil_criteria) %>%
table()
## .
## yes no
## 174 0
In the preregistration, we specified data quality checks: if participants do not comply with the experimental instructions or give (almost) exactly the same response on each trial (i.e., within 1 of 100 scale points).
To identify possibly rushed responses, we inspect the variation of ratings and compare those to the how quickly participants responded.
The variation looks fine, nobody just clicked the same point of the scale repeatedly. One participant was extremely fast, spending about 1.5 seconds per rating, possibly hinting at randomly clicking through.
# lowest variation of the ratings
working_file %>%
group_by(pp) %>%
summarize(
mean = (mean(rating, na.rm = TRUE)),
sd = sd(rating, na.rm = TRUE)
) %>%
arrange(sd)
## # A tibble: 174 x 3
## pp mean sd
## <fct> <dbl> <dbl>
## 1 pp_139 42.1 10.1
## 2 pp_47 53.2 12.2
## 3 pp_137 58.1 14.3
## 4 pp_49 82.8 14.6
## 5 pp_141 53.6 14.8
## 6 pp_140 56.4 15.4
## 7 pp_160 59.3 15.8
## 8 pp_112 55.1 16.1
## 9 pp_125 62.5 16.2
## 10 pp_30 55.3 16.4
## # ... with 164 more rows
# check response times
time_means <-
working_file %>%
group_by(pp) %>%
summarize(
time_mean = mean(time)
) %>%
arrange(time_mean)
# inspect the fastest times
head(time_means, n = 10)
## # A tibble: 10 x 2
## pp time_mean
## <fct> <dbl>
## 1 pp_81 1.54
## 2 pp_69 2.40
## 3 pp_82 2.65
## 4 pp_54 2.92
## 5 pp_117 3.19
## 6 pp_64 3.23
## 7 pp_120 3.25
## 8 pp_63 3.38
## 9 pp_137 3.39
## 10 pp_85 3.42
# compare to median time
median(working_file$time)
## [1] 4.697
# plot them
time_means %>%
ggplot(aes(x = time_mean)) +
geom_density(color = "darkgrey", fill = "darkgrey") +
geom_point(aes(x = time_mean, y = 0))
Note: This is not preregistered. It’s pretty hard to tell what mean time on answering items indicates inattentiveness. However, we can at least set those mean times into relation with the sample median time. Leiner has put forward a useful way to identify meaningless data in internet studies, validated with real data.
The method is relatively straightforward: The Relative Speed Index (RSI) gives an impression of how fast participants responded to items without giving too much weight to quick or slow responses on individual stimuli. An RSI of > 1.75 is considered an indicator of meaningless data (actually, it might even be conservative).
The RSI is computed as follows:
working_file <-
working_file %>%
# calculate median time for each stimulus (aka page)
group_by(
food_type,
stimulus_no,
label_type,
measure
) %>%
summarize(
page_time_median = median(time)
) %>%
# add those median page times to the data set and match with the stimuli there
left_join(
working_file,
.,
by = c(
"food_type",
"stimulus_no",
"label_type",
"measure"
)
) %>%
# now divide the median page completion time by the individual page completion time (so per row)
mutate(
speed_factor = page_time_median / time
) %>%
# trim to [0|3]: dividing by zero will yield a speed factor of Inf, anything else a higher number
mutate(
speed_factor_trimmed = case_when(
speed_factor == Inf ~ 0,
speed_factor > 3 ~ 3,
TRUE ~ speed_factor
)
) %>%
# average those trimmed speed factors to obtain RSI
group_by(pp) %>%
mutate(
rsi = mean(speed_factor_trimmed)
) %>%
ungroup()
Let’s inspect the RSI. Indeed, there is at least one massive outlier, and several RSIs > 1.75. If we inspect mean completion time side by side with RSI, we see that several of those participants we suspected rushed through also come up here. We thus exclude those participants with an RSI > 1.75. Note: this decision was not preregistered, but we logged it in a decision log file.
I’ll run the analyses with and without these 8 cases, so I’ll put their data into a separate object.
# let's plot them
working_file %>%
group_by(pp) %>%
slice(1) %>%
ggplot(aes(x = rsi)) +
geom_density() +
geom_point(aes(x = rsi, y = 0))
# compare with raw mean time per page
working_file %>%
group_by(pp) %>%
slice(1) %>%
arrange(desc(rsi)) %>%
select(pp, rsi) %>%
left_join(.,
time_means,
by = "pp"
)
## # A tibble: 174 x 3
## # Groups: pp [174]
## pp rsi time_mean
## <fct> <dbl> <dbl>
## 1 pp_81 2.75 1.54
## 2 pp_116 2.18 4.02
## 3 pp_69 2.13 2.40
## 4 pp_82 2.12 2.65
## 5 pp_54 2.00 2.92
## 6 pp_118 1.97 5.24
## 7 pp_64 1.94 3.23
## 8 pp_125 1.77 3.71
## 9 pp_128 1.75 3.66
## 10 pp_137 1.74 3.39
## # ... with 164 more rows
# how many do we exclude?
length(unique((working_file$pp))) - nrow(working_file %>% filter(rsi <= 1.75) %>% group_by(pp) %>% slice(1))
## [1] 8
# create a separate file for those cases that we add when we test the robustness of the analysis
high_rsi <-
working_file %>%
filter(rsi > 1.75)
# remove them from working file
working_file <-
working_file %>%
filter(rsi <= 1.75)
How long did participants spend on the survey? Median duration is around ~ 14 minutes.
describe_visualize(
working_file,
duration,
FALSE,
TRUE
)
## [[1]]
##
##
## x
## ------------- -------------
## nbr.val 1.660000e+02
## nbr.null 0.000000e+00
## nbr.na 0.000000e+00
## min 4.440000e+02
## max 2.513000e+03
## range 2.069000e+03
## sum 1.549810e+05
## median 8.460000e+02
## mean 9.336205e+02
## SE.mean 2.846142e+01
## CI.mean.0.95 5.619552e+01
## var 1.344687e+05
## std.dev 3.666997e+02
## coef.var 3.927717e-01
##
## [[2]]
How long did they spend on the instructions for the attractiveness and simulations, respectively? Overall not long (~ 10 seconds), but some apparently left the browser window open for quite some time.
describe_visualize(
working_file,
desire_instructions_time,
FALSE,
TRUE
)
## [[1]]
##
##
## x
## ------------- -------------
## nbr.val 166.0000000
## nbr.null 0.0000000
## nbr.na 0.0000000
## min 0.8990000
## max 53.7550000
## range 52.8560000
## sum 1854.5210000
## median 8.7835000
## mean 11.1718133
## SE.mean 0.6508350
## CI.mean.0.95 1.2850383
## var 70.3153031
## std.dev 8.3854221
## coef.var 0.7505874
##
## [[2]]
describe_visualize(
working_file,
simulations_instructions_time,
FALSE,
TRUE
)
## [[1]]
##
##
## x
## ------------- ------------
## nbr.val 166.000000
## nbr.null 0.000000
## nbr.na 0.000000
## min 1.064000
## max 368.953000
## range 367.889000
## sum 2943.293000
## median 11.281500
## mean 17.730681
## SE.mean 2.776208
## CI.mean.0.95 5.481473
## var 1279.417335
## std.dev 35.768944
## coef.var 2.017348
##
## [[2]]
Were there language issues? Barely, and the qualitative answers don’t give cause for concern.
my_table(working_file, language_issues)
## .
## yes no
## 2 164
working_file %>%
group_by(pp) %>%
slice(1) %>%
ungroup() %>%
filter(language_issues == "yes") %>%
pull(language_issues_details)
## [1] "I didn't know the meaning of a few words"
## [2] "a couple words I was unsure of, like ragu"
First, I look at hunger and thirst ratings. Participants seemed to be more thirsty than hungry.
describe_visualize(
working_file,
hungry,
FALSE,
TRUE
)
## [[1]]
##
##
## x
## ------------- -------------
## nbr.val 166.0000000
## nbr.null 29.0000000
## nbr.na 0.0000000
## min 0.0000000
## max 100.0000000
## range 100.0000000
## sum 4744.0000000
## median 23.0000000
## mean 28.5783133
## SE.mean 1.9521333
## CI.mean.0.95 3.8543811
## var 632.5968602
## std.dev 25.1514783
## coef.var 0.8800897
##
## [[2]]
describe_visualize(
working_file,
thirsty,
FALSE,
TRUE
)
## [[1]]
##
##
## x
## ------------- ------------
## nbr.val 166.000000
## nbr.null 10.000000
## nbr.na 0.000000
## min 0.000000
## max 100.000000
## range 100.000000
## sum 7564.000000
## median 51.000000
## mean 45.566265
## SE.mean 1.963437
## CI.mean.0.95 3.876699
## var 639.944067
## std.dev 25.297116
## coef.var 0.555172
##
## [[2]]
What’s the age range in our sample? One participant indicated to be 259 years old. Probably a typo, but not sure whether it’s supposed to be 25 or 59, so I set that value to NA.
describe_visualize(
working_file,
age,
FALSE,
TRUE
)
## [[1]]
##
##
## x
## ------------- ------------
## nbr.val 166.000000
## nbr.null 0.000000
## nbr.na 0.000000
## min 18.000000
## max 259.000000
## range 241.000000
## sum 5353.000000
## median 30.000000
## mean 32.246988
## SE.mean 1.581207
## CI.mean.0.95 3.122006
## var 415.035597
## std.dev 20.372422
## coef.var 0.631762
##
## [[2]]
working_file <-
working_file %>%
mutate(
age = if_else(age == 259, NA_real_, age)
)
describe_visualize(
working_file,
age,
FALSE,
TRUE
)
## [[1]]
##
##
## x
## ------------- -------------
## nbr.val 165.0000000
## nbr.null 0.0000000
## nbr.na 1.0000000
## min 18.0000000
## max 69.0000000
## range 51.0000000
## sum 5094.0000000
## median 30.0000000
## mean 30.8727273
## SE.mean 0.7868169
## CI.mean.0.95 1.5535972
## var 102.1483370
## std.dev 10.1068460
## coef.var 0.3273713
##
## [[2]]
What is the gender distribution in our sample?
my_table(working_file, gender)
## .
## male female other
## 48 117 1
What’s the height distribution? One participant appears to be giant. I think this is too unrealistic. We could think about whether that participant took the survey seriously, but the ratings and duration on instruction look good when inspecting the raw data. After setting that entry to missing, the rest looks pretty normal.
describe_visualize(
working_file,
height_cm,
FALSE,
TRUE
)
## [[1]]
##
##
## x
## ------------- -------------
## nbr.val 1.650000e+02
## nbr.null 0.000000e+00
## nbr.na 1.000000e+00
## min 1.500000e+02
## max 2.900000e+02
## range 1.400000e+02
## sum 2.802400e+04
## median 1.700000e+02
## mean 1.698424e+02
## SE.mean 9.919466e-01
## CI.mean.0.95 1.958633e+00
## var 1.623531e+02
## std.dev 1.274178e+01
## coef.var 7.502120e-02
##
## [[2]]
working_file <-
working_file %>%
mutate(
height_cm = if_else(height_cm == 290, NA_real_, height_cm)
)
describe_visualize(
working_file,
height_cm,
FALSE,
TRUE
)
## [[1]]
##
##
## x
## ------------- -------------
## nbr.val 1.640000e+02
## nbr.null 0.000000e+00
## nbr.na 2.000000e+00
## min 1.500000e+02
## max 1.930000e+02
## range 4.300000e+01
## sum 2.773400e+04
## median 1.700000e+02
## mean 1.691098e+02
## SE.mean 6.727867e-01
## CI.mean.0.95 1.328501e+00
## var 7.423328e+01
## std.dev 8.615874e+00
## coef.var 5.094840e-02
##
## [[2]]
Let’s look at the weight. There’s a couple of participants who indicated to weigh less than 20 kilos. I find that hard to believe and set their weight as missing.
describe_visualize(
working_file,
weight_kilos,
FALSE,
TRUE
)
## [[1]]
##
##
## x
## ------------- -------------
## nbr.val 1.640000e+02
## nbr.null 0.000000e+00
## nbr.na 2.000000e+00
## min 3.000000e+00
## max 1.510000e+02
## range 1.480000e+02
## sum 1.188020e+04
## median 7.000000e+01
## mean 7.244024e+01
## SE.mean 1.352381e+00
## CI.mean.0.95 2.670445e+00
## var 2.999452e+02
## std.dev 1.731893e+01
## coef.var 2.390788e-01
##
## [[2]]
working_file %>%
filter(weight_kilos < 20) %>%
group_by(pp) %>%
slice(1) %>%
ungroup() %>%
select(pp, contains("weight"))
## # A tibble: 2 x 6
## pp weight_choice weight_kilos weight_stones weight_pounds
## <fct> <fct> <dbl> <dbl> <dbl>
## 1 pp_61 pounds_only 3 NA NA
## 2 pp_90 pounds_only 18 NA NA
## # ... with 1 more variable: weight_pounds_only <dbl>
working_file <-
working_file %>%
mutate(
weight_kilos = if_else(weight_kilos < 20, NA_real_, weight_kilos)
)
describe_visualize(
working_file,
weight_kilos,
FALSE,
TRUE
)
## [[1]]
##
##
## x
## ------------- -------------
## nbr.val 1.620000e+02
## nbr.null 0.000000e+00
## nbr.na 4.000000e+00
## min 4.300000e+01
## max 1.510000e+02
## range 1.080000e+02
## sum 1.185920e+04
## median 7.000000e+01
## mean 7.320494e+01
## SE.mean 1.253943e+00
## CI.mean.0.95 2.476297e+00
## var 2.547246e+02
## std.dev 1.596009e+01
## coef.var 2.180194e-01
##
## [[2]]
Next, some information about their diet.
# diet
my_table(working_file, diet)
## .
## omnivore vegetarian other
## 163 2 1
# how often they eat meat per week
describe_visualize(
working_file,
meat_per_week,
FALSE,
TRUE
)
## [[1]]
##
##
## x
## ------------- -------------
## nbr.val 166.0000000
## nbr.null 2.0000000
## nbr.na 0.0000000
## min 0.0000000
## max 21.0000000
## range 21.0000000
## sum 1155.0000000
## median 7.0000000
## mean 6.9578313
## SE.mean 0.2889093
## CI.mean.0.95 0.5704357
## var 13.8557868
## std.dev 3.7223362
## coef.var 0.5349851
##
## [[2]]
# to what extent participants are currently trying to change their diets
describe_visualize(
working_file,
change_diet,
FALSE,
TRUE
)
## [[1]]
##
##
## x
## ------------- -------------
## nbr.val 166.0000000
## nbr.null 0.0000000
## nbr.na 0.0000000
## min 1.0000000
## max 100.0000000
## range 99.0000000
## sum 7449.8000000
## median 55.4500000
## mean 44.8783133
## SE.mean 2.6610126
## CI.mean.0.95 5.2540246
## var 1175.4439765
## std.dev 34.2847485
## coef.var 0.7639491
##
## [[2]]
# attitudes toward eating meat
describe_visualize(
working_file,
attitude_meat,
FALSE,
TRUE
)
## [[1]]
##
##
## x
## ------------- -------------
## nbr.val 1.660000e+02
## nbr.null 0.000000e+00
## nbr.na 0.000000e+00
## min 1.000000e+00
## max 1.000000e+02
## range 9.900000e+01
## sum 1.241726e+04
## median 8.140500e+01
## mean 7.480277e+01
## SE.mean 2.042682e+00
## CI.mean.0.95 4.033165e+00
## var 6.926433e+02
## std.dev 2.631812e+01
## coef.var 3.518335e-01
##
## [[2]]
# attitudes toward vegan food
describe_visualize(
working_file,
attitude_vegan,
FALSE,
TRUE
)
## [[1]]
##
##
## x
## ------------- -------------
## nbr.val 166.0000000
## nbr.null 0.0000000
## nbr.na 0.0000000
## min 1.0000000
## max 100.0000000
## range 99.0000000
## sum 9025.8000000
## median 56.5650000
## mean 54.3722892
## SE.mean 2.1154573
## CI.mean.0.95 4.1768554
## var 742.8764941
## std.dev 27.2557608
## coef.var 0.5012804
##
## [[2]]
# attitude toward plant-based food
describe_visualize(
working_file,
attitude_pb,
FALSE,
TRUE
)
## [[1]]
##
##
## x
## ------------- -------------
## nbr.val 166.0000000
## nbr.null 0.0000000
## nbr.na 0.0000000
## min 1.0000000
## max 100.0000000
## range 99.0000000
## sum 9886.2600000
## median 62.3500000
## mean 59.5557831
## SE.mean 1.9879785
## CI.mean.0.95 3.9251555
## var 656.0417288
## std.dev 25.6133116
## coef.var 0.4300726
##
## [[2]]
# whether participants have food allergies
my_table(working_file, allergies)
## .
## yes no
## 8 158
Next, I visualize the ratings on attractiveness and simulations, respectively. I’ll first visualize and describe them overall, then per factor level (as in two main effects of label type and food type), and then the interaction.
First, I inspect the overall attractiveness ratings, regardless of label or food type.
describe_visualize(
working_file %>%
filter(measure == "attractiveness"),
rating,
TRUE,
TRUE
)
## [[1]]
##
##
## x
## ------------- -------------
## nbr.val 6.640000e+03
## nbr.null 2.740000e+02
## nbr.na 0.000000e+00
## min 0.000000e+00
## max 1.000000e+02
## range 1.000000e+02
## sum 3.636170e+05
## median 6.070000e+01
## mean 5.476159e+01
## SE.mean 3.602866e-01
## CI.mean.0.95 7.062776e-01
## var 8.619149e+02
## std.dev 2.935839e+01
## coef.var 5.361127e-01
##
## [[2]]
Next, a raincloud plot with ratings per label type. Doesn’t look like there’s much of a difference.
rc_plot(
working_file,
"attractiveness",
"rating",
"label_type",
"Label Type",
"Raincloud Plot of Food Attractiveness Ratings"
)
# raw means and SDs (without taking grouping by PP into account)
working_file %>%
filter(measure == "attractiveness") %>%
group_by(label_type) %>%
summarize(
mean = mean(rating, na.rm = TRUE),
sd = sd(rating, na.rm = TRUE)
)
## # A tibble: 2 x 3
## label_type mean sd
## <fct> <dbl> <dbl>
## 1 control 53.1 29.4
## 2 enhanced 56.4 29.3
# means (unchanged) and SDs after aggregated per participant (to make it comparable to RM ANOVA)
working_file %>%
filter(measure == "attractiveness") %>%
group_by(pp, label_type) %>%
summarize(mean_agg = mean(rating, na.rm = TRUE)) %>%
ungroup() %>%
group_by(label_type) %>%
summarize(
mean = mean(mean_agg),
sd = sd(mean_agg)
)
## # A tibble: 2 x 3
## label_type mean sd
## <fct> <dbl> <dbl>
## 1 control 53.1 12.4
## 2 enhanced 56.4 13.4
Let’s inspect the ratings per food type. People seem to like meat-based labels more.
rc_plot(
working_file,
"attractiveness",
"rating",
"food_type",
"Food Type",
"Raincloud Plot of Food Attractiveness Ratings"
)
working_file %>%
filter(measure == "attractiveness") %>%
group_by(food_type) %>%
summarize(
mean = mean(rating, na.rm = TRUE),
sd = sd(rating, na.rm = TRUE)
)
## # A tibble: 2 x 3
## food_type mean sd
## <fct> <dbl> <dbl>
## 1 meat_based 61.3 28.0
## 2 plant_based 48.2 29.2
Next, I inspect the interaction of label type and food type for attractiveness ratings. The raincloud plots are not that easy to read (even if I add means with CIs), so for easier visualization I use a line plot on the aggregated means with within-subject standard error (from the Rmisc::summarySEwithin function).
rc_plot(
working_file,
"attractiveness",
"rating",
"label_type",
"Label Type",
"Raincloud Plot of Food Attractiveness Ratings",
"food_type"
)
# get summary statistics
summary_attr <-
summarySEwithin(
data = working_file %>%
filter(measure == "attractiveness") %>%
group_by(pp, label_type, food_type) %>%
summarize(rating = mean(rating, na.rm = TRUE)),
measurevar = "rating",
withinvars = c("label_type", "food_type"),
idvar = "pp"
)
# have a look at means and SDs
summary_attr
## label_type food_type N rating sd se ci
## 1 control meat_based 166 59.78544 11.93978 0.9267069 1.829732
## 2 control plant_based 166 46.40062 11.97610 0.9295257 1.835298
## 3 enhanced meat_based 166 62.87654 11.57998 0.8987805 1.774593
## 4 enhanced plant_based 166 49.98378 10.98582 0.8526650 1.683541
# line graph
line_plot(
summary_attr,
"label_type",
rating,
"food_type"
)
First, I inspect the overall simulations ratings, regardless of label or food type.
describe_visualize(
working_file %>%
filter(measure == "simulations"),
rating,
TRUE,
TRUE
)
## [[1]]
##
##
## x
## ------------- -------------
## nbr.val 6.638000e+03
## nbr.null 1.070000e+02
## nbr.na 2.000000e+00
## min 0.000000e+00
## max 1.000000e+02
## range 1.000000e+02
## sum 3.915712e+05
## median 6.394000e+01
## mean 5.898934e+01
## SE.mean 3.410187e-01
## CI.mean.0.95 6.685062e-01
## var 7.719578e+02
## std.dev 2.778413e+01
## coef.var 4.710026e-01
##
## [[2]]
Next, a raincloud plot with simulations ratings per label type. Doesn’t look like there’s much of a difference.
rc_plot(
working_file,
"simulations",
"rating",
"label_type",
"Label Type",
"Raincloud Plot of Food Simulations Ratings"
)
# raw means and SDs
working_file %>%
filter(measure == "simulations") %>%
group_by(label_type) %>%
summarize(
mean = mean(rating, na.rm = TRUE),
sd = sd(rating, na.rm = TRUE)
)
## # A tibble: 2 x 3
## label_type mean sd
## <fct> <dbl> <dbl>
## 1 control 55.5 28.4
## 2 enhanced 62.5 26.7
# means (unchanged) and SDs after aggregated per participant (to make it comparable to RM ANOVA)
working_file %>%
filter(measure == "simulations") %>%
group_by(pp, label_type) %>%
summarize(mean_agg = mean(rating, na.rm = TRUE)) %>%
ungroup() %>%
group_by(label_type) %>%
summarize(
mean = mean(mean_agg),
sd = sd(mean_agg)
)
## # A tibble: 2 x 3
## label_type mean sd
## <fct> <dbl> <dbl>
## 1 control 55.5 15.1
## 2 enhanced 62.5 12.9
Let’s inspect the ratings per food type. People seem to simulate more with meat-based labels.
rc_plot(
working_file,
"simulations",
"rating",
"food_type",
"Food Type",
"Raincloud Plot of Food Simulations Ratings"
)
working_file %>%
filter(measure == "simulations") %>%
group_by(food_type) %>%
summarize(
mean = mean(rating, na.rm = TRUE),
sd = sd(rating, na.rm = TRUE)
)
## # A tibble: 2 x 3
## food_type mean sd
## <fct> <dbl> <dbl>
## 1 meat_based 64.9 26.0
## 2 plant_based 53.1 28.2
Next, I inspect the interaction of label type and food type for simulations ratings with rainclouds and line graphs.
rc_plot(
working_file,
"simulations",
"rating",
"label_type",
"Label Type",
"Raincloud Plot of Food Attractiveness Ratings",
"food_type"
)
# get summary statistics
summary_sim <-
summarySEwithin(
data = working_file %>%
filter(measure == "simulations") %>%
group_by(pp, label_type, food_type) %>%
summarize(rating = mean(rating, na.rm = TRUE)),
measurevar = "rating",
withinvars = c("label_type", "food_type"),
idvar = "pp"
)
# have a look at Ms and SDs
summary_sim
## label_type food_type N rating sd se ci
## 1 control meat_based 166 61.85078 11.84391 0.9192660 1.815041
## 2 control plant_based 166 49.09478 11.15353 0.8656821 1.709242
## 3 enhanced meat_based 166 67.95105 11.06950 0.8591596 1.696364
## 4 enhanced plant_based 166 57.05902 12.05469 0.9356255 1.847342
# line graph
line_plot(
summary_sim,
"label_type",
rating,
"food_type"
)
I now move to the actual analysis. I will structure the analysis section per hypothesis, and each hypothesis with a confirmatory/preregistered and an exploratory sub-section. I will note and explain deviations from the preregistrations wherever applicable. After the hypothesis sections, I’ll also describe exploratory results.
The preregistered hypothesis reads as follows:
Simulation and attractiveness will be higher for foods with enhanced compared to neutral labels.
I’ll start with confirmatory model. Note that the preregistration was imprecise when saying that we will predict the outcome with food type and label type. It actually means controlling for food item as random effect. Thus, we specify a model with maximal effects.
Before we can do that, we need to add the individual food item as a factor to the data set. As of now, we only have the food type plus a stimulus number, which together create 40 unique food items. I’ll add them here; for a full description, see the OSM materials.
# file that contains the foods
food_items <-
read_csv(
here("data", "study2", "food_items.csv"),
col_types = list(
col_factor(levels = NULL),
col_number(),
col_factor(levels = NULL)
)
)
head(food_items, n = 5)
## # A tibble: 5 x 3
## food_type stimulus_no food
## <fct> <dbl> <fct>
## 1 meat_based 1 chicken_balti
## 2 meat_based 2 pepperoni_pizza
## 3 meat_based 3 empanadas
## 4 meat_based 4 steak_pie
## 5 meat_based 5 pasta_bake
working_file <-
working_file %>%
left_join(
.,
food_items,
by = c("food_type",
"stimulus_no")
) %>%
select( # reorder columns
pp:stimulus_no,
food,
everything()
)
# also add them to the file with participants who had a high RSI
high_rsi <-
high_rsi %>%
left_join(
.,
food_items,
by = c("food_type",
"stimulus_no")
) %>%
select( # reorder columns
pp:stimulus_no,
food,
everything()
)
Also, because there’s two dependent variables in the data, I’ll split the working file into two analysis files, one per measurement.
simulations <-
working_file %>%
filter(measure == "simulations") %>%
mutate(
measure = droplevels(measure)
)
attractiveness <-
working_file %>%
filter(measure == "attractiveness") %>%
mutate(
measure = droplevels(measure)
)
Construct the main effects model: predict simulations by label type. Because we’re only interested in the main effects, I’ll use dummy coding for the predictor; we only have two levels and Type 2 or 3 Sums of Squares will be the same. The model converges without problems.
# set contrasts
options(contrasts = c("contr.treatment", "contr.poly"))
# construct model
h1_sim <- lme4::lmer(
rating ~
label_type +
(1 + label_type | pp) +
(1 + label_type | food),
simulations
)
# inspect model
summary(h1_sim)
## Linear mixed model fit by REML ['lmerMod']
## Formula: rating ~ label_type + (1 + label_type | pp) + (1 + label_type |
## food)
## Data: simulations
##
## REML criterion at convergence: 61175.9
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.4229 -0.6460 0.1250 0.6867 2.6661
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## pp (Intercept) 199.15 14.112
## label_typeenhanced 102.04 10.101 -0.56
## food (Intercept) 70.54 8.399
## label_typeenhanced 15.20 3.899 -0.42
## Residual 528.00 22.978
## Number of obs: 6638, groups: pp, 166; food, 40
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 55.480 1.767 31.398
## label_typeenhanced 7.018 1.146 6.124
##
## Correlation of Fixed Effects:
## (Intr)
## lbl_typnhnc -0.486
Now let’s inspect model diagnostics. The residuals look normally distributed; the proportion of large residuals is fine; the Q-Q-plot also looks fine.
However, there are clear cut-offs in the predicted vs. residuals plot. That’s because the outcome variable has (0 | 100) bounds. For example, a fitted value of 50 can be off by a maximum of -50 or 50, not lower or higher. Usually, this pattern happens with binomial distributions. However, I don’t think using a binomial model would be appropriate here: a) the residuals are normally distributed; b) I would argue that a 100 rating slider is conceptually rather different than a proportion (I wouldn’t know how to count successes and failures for a binomial distribution). Therefore, I will maintain the linear model, because I believe it makes sense conceptually and displays decent fit.
Moreover, two participants stand out as formal outliers: pp_91 and pp_48. For foods, there’s one stimulus with a large influence: vegan_fillets. I’ll check whether the model is robust to their exclusion.
# inspect standardized residuals
densityplot(resid(h1_sim, scaled = TRUE))
# q-q plot
car::qqPlot(resid(h1_sim, scaled = TRUE))
## [1] 3469 3542
# proportion of residuals in the extremes of the distribution
lmer_residuals(h1_sim)
## # A tibble: 3 x 4
## standardized_cutoff proportion_cutoff proportion below_cutoff
## <dbl> <dbl> <dbl> <lgl>
## 1 2 0.05 0.0375 TRUE
## 2 2.5 0.01 0.0104 FALSE
## 3 3 0.0001 0.000904 FALSE
# obtain outliers
h1_sim_outliers_pp <- influence.ME::influence(h1_sim, c("pp"))
h1_sim_outliers_food <- influence.ME::influence(h1_sim, c("food"))
# plot formal outliers for pp
plot(h1_sim_outliers_pp, which = 'cook')
plot(h1_sim_outliers_pp, which = 'dfbetas')[1]
plot(h1_sim_outliers_pp, which = 'dfbetas')[2]
# which ones are the highest
arrange_cook(h1_sim_outliers_pp, "pp")
## # A tibble: 6 x 2
## pp cooks_distance
## <chr> <dbl>
## 1 pp_91 0.0392
## 2 pp_48 0.0262
## 3 pp_103 0.0179
## 4 pp_109 0.0168
## 5 pp_143 0.0142
## 6 pp_120 0.0137
# plot formal outliers for food
plot(h1_sim_outliers_food, which = 'cook')
plot(h1_sim_outliers_food, which = 'dfbetas')[1]
plot(h1_sim_outliers_food, which = 'dfbetas')[2]
# which ones are so far out on cook's distance?
arrange_cook(h1_sim_outliers_food, "food")
## # A tibble: 6 x 2
## food cooks_distance
## <chr> <dbl>
## 1 vegan_fillets 0.0825
## 2 veggie_burger 0.0447
## 3 mushroom_burrito 0.0377
## 4 pasta_bake 0.0362
## 5 fishless_cakes 0.0314
## 6 mint_soup 0.0305
# plot the fitted vs. the residuals (to check for homo/heteroskedasticity) with added smoothed line.
plot(h1_sim, type = c('p', 'smooth'))
The effect is highly significant.
# get p-value
h1_sim_p <-
mixed(
rating ~
label_type +
(1 + label_type | pp) +
(1 + label_type | food),
simulations,
type = 3,
method = "S",
check_contrasts = FALSE
)
## Fitting one lmer() model. [DONE]
## Calculating p-values. [DONE]
# what is it
anova(h1_sim_p)
## Mixed Model Anova Table (Type 3 tests, S-method)
##
## Model: rating ~ label_type + (1 + label_type | pp) + (1 + label_type |
## Model: food)
## Data: simulations
## num Df den Df F Pr(>F)
## label_type 1 95.131 37.509 2.037e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# approximate effect size
r.squaredGLMM(h1_sim)
## R2m R2c
## [1,] 0.01589684 0.3184168
Next, I run the same model, but with attractiveness as outcome.
# construct model
h1_attr <- lme4::lmer(
rating ~
label_type +
(1 + label_type | pp) +
(1 + label_type | food),
attractiveness
)
# inspect model
summary(h1_attr)
## Linear mixed model fit by REML ['lmerMod']
## Formula: rating ~ label_type + (1 + label_type | pp) + (1 + label_type |
## food)
## Data: attractiveness
##
## REML criterion at convergence: 62303
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.9459 -0.7134 0.1018 0.7232 2.6304
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## pp (Intercept) 121.51 11.023
## label_typeenhanced 23.35 4.832 0.03
## food (Intercept) 98.44 9.922
## label_typeenhanced 20.23 4.498 -0.45
## Residual 639.31 25.285
## Number of obs: 6640, groups: pp, 166; food, 40
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 53.098 1.840 28.858
## label_typeenhanced 3.329 1.016 3.278
##
## Correlation of Fixed Effects:
## (Intr)
## lbl_typnhnc -0.367
Let’s inspect model diagnostics. All of them look good, and there’s only one clear outlier: veggie_burger.
# inspect standardized residuals
densityplot(resid(h1_attr, scaled = TRUE))
# q-q plot
car::qqPlot(resid(h1_attr, scaled = TRUE))
## [1] 2797 2799
# proportion of residuals in the extremes of the distribution
lmer_residuals(h1_attr)
## # A tibble: 3 x 4
## standardized_cutoff proportion_cutoff proportion below_cutoff
## <dbl> <dbl> <dbl> <lgl>
## 1 2 0.05 0.0330 TRUE
## 2 2.5 0.01 0.00542 TRUE
## 3 3 0.0001 0 TRUE
# obtain outliers
h1_attr_outliers_pp <- influence.ME::influence(h1_attr, c("pp"))
h1_attr_outliers_food <- influence.ME::influence(h1_attr, c("food"))
# plot formal outliers for pp
plot(h1_attr_outliers_pp, which = 'cook')
plot(h1_attr_outliers_pp, which = 'dfbetas')[1]
plot(h1_attr_outliers_pp, which = 'dfbetas')[2]
# which ones are the highest
arrange_cook(h1_attr_outliers_pp, "pp")
## # A tibble: 6 x 2
## pp cooks_distance
## <chr> <dbl>
## 1 pp_59 0.0168
## 2 pp_181 0.0164
## 3 pp_115 0.0139
## 4 pp_18 0.0132
## 5 pp_41 0.0124
## 6 pp_126 0.0114
# plot formal outliers for food
plot(h1_attr_outliers_food, which = 'cook')
plot(h1_attr_outliers_food, which = 'dfbetas')[1]
plot(h1_attr_outliers_food, which = 'dfbetas')[2]
# which ones are so far out on cook's distance?
arrange_cook(h1_attr_outliers_food, "food")
## # A tibble: 6 x 2
## food cooks_distance
## <chr> <dbl>
## 1 veggie_burger 0.212
## 2 noddles_veggies 0.0744
## 3 steam_bags 0.0485
## 4 spring_rolls 0.0390
## 5 vegan_fillets 0.0384
## 6 fishless_cakes 0.0373
# plot the fitted vs. the residuals (to check for homo/heteroskedasticity) with added smoothed line.
plot(h1_attr, type = c('p', 'smooth'))
Although the difference is quite small in absolute terms, it’s still significant.
# get p-value
h1_attr_p <-
mixed(
rating ~
label_type +
(1 + label_type | pp) +
(1 + label_type | food),
attractiveness,
type = 3,
method = "S",
check_contrasts = FALSE
)
## Fitting one lmer() model. [DONE]
## Calculating p-values. [DONE]
# what is it
anova(h1_attr_p)
## Mixed Model Anova Table (Type 3 tests, S-method)
##
## Model: rating ~ label_type + (1 + label_type | pp) + (1 + label_type |
## Model: food)
## Data: attractiveness
## num Df den Df F Pr(>F)
## label_type 1 47.57 10.742 0.00196 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# approximate effect size
r.squaredGLMM(h1_attr)
## R2m R2c
## [1,] 0.003202627 0.261087
Here, I conduct several checks to see whether the effects are robust to control variables and outlier exclusion.
I start with including the participants with a high RSI that we excluded above. Their exclusion was not preregistered.
The simulation model did not converge. I tried all current best practices:
In the end, only simplifying the model worked (aka removing the intercept for food). Under this condition, the effect remained significant. For attractiveness, the model converges and shows to be robust to the inclusion of those cases.
# get p-value for simulations
h1_sim_p_rsi <-
mixed(
rating ~
label_type +
(1 + label_type | pp) +
(0 + label_type | food),
data = full_join(
simulations,
high_rsi %>%
filter(measure == "simulations")
),
type = 3,
method = "S",
check_contrasts = FALSE
)
## Fitting one lmer() model. [DONE]
## Calculating p-values. [DONE]
anova(h1_sim_p_rsi)
## Mixed Model Anova Table (Type 3 tests, S-method)
##
## Model: rating ~ label_type + (1 + label_type | pp) + (0 + label_type |
## Model: food)
## Data: full_join
## Data: simulations
## Data: high_rsi %>% filter(measure == "simulations")
## num Df den Df F Pr(>F)
## label_type 1 100.65 34.901 4.744e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# get p-value for attractiveness
h1_attr_p_rsi <-
mixed(
rating ~
label_type +
(1 + label_type | pp) +
(1 + label_type | food),
data = full_join( # add high rsi cases
attractiveness,
high_rsi %>%
filter(measure == "attractiveness")
),
type = 3,
method = "S",
check_contrasts = FALSE
)
## Fitting one lmer() model. [DONE]
## Calculating p-values. [DONE]
anova(h1_attr_p_rsi)
## Mixed Model Anova Table (Type 3 tests, S-method)
##
## Model: rating ~ label_type + (1 + label_type | pp) + (1 + label_type |
## Model: food)
## Data: full_join
## Data: attractiveness
## Data: high_rsi %>% filter(measure == "attractiveness")
## num Df den Df F Pr(>F)
## label_type 1 47.86 10.742 0.001954 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
As a next step, I see whether the results are robust to the exclusion of the outliers I identified above. I start with the outliers on simulations: the effect remains significant.
# get p-value
h1_sim_no_outliers <-
mixed(
rating ~
label_type +
(1 + label_type | pp) +
(1 + label_type | food),
simulations %>%
filter(
!pp %in% c("pp_91", "pp_48"),
food != "vegan_fillets"
),
type = 3,
method = "S",
check_contrasts = FALSE
)
## Fitting one lmer() model. [DONE]
## Calculating p-values. [DONE]
# what is it
anova(h1_sim_no_outliers)
## Mixed Model Anova Table (Type 3 tests, S-method)
##
## Model: rating ~ label_type + (1 + label_type | pp) + (1 + label_type |
## Model: food)
## Data: %>%
## Data: simulations
## Data: filter(!pp %in% c("pp_91", "pp_48"), food != "vegan_fillets")
## num Df den Df F Pr(>F)
## label_type 1 80.682 37.94 2.701e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Next, I remove the outliers on attractiveness: the effect remains robust.
# get p-value
h1_attr_no_outliers <-
mixed(
rating ~
label_type +
(1 + label_type | pp) +
(1 + label_type | food),
attractiveness %>%
filter(food != "veggie_burger"),
type = 3,
method = "S",
check_contrasts = FALSE
)
## Fitting one lmer() model. [DONE]
## Calculating p-values. [DONE]
# what is it
anova(h1_attr_no_outliers)
## Mixed Model Anova Table (Type 3 tests, S-method)
##
## Model: rating ~ label_type + (1 + label_type | pp) + (1 + label_type |
## Model: food)
## Data: %>%
## Data: attractiveness
## Data: filter(food != "veggie_burger")
## num Df den Df F Pr(>F)
## label_type 1 46.671 10.201 0.002515 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
As a last robustness check, I include two control variables, namely the attitude toward vegan/plant-based foods and the frequency of eating meat. For the former, we preregistered to take the mean of those two attitude variables.
simulations <-
simulations %>%
mutate(
attitude_pb_vegan = c(attitude_pb + attitude_vegan) / 2
)
attractiveness <-
attractiveness %>%
mutate(
attitude_pb_vegan = (attitude_pb + attitude_vegan) / 2
)
Now, we include this average attitude and the frequency of eating meat as covariates. Because they are continuous, I’ll grand-mean center them to make it easier to interpret their estimate.
Apparently, attitudes toward plant-based/vegan food has a positive association with simulations, whereas frequency of eating meat has a negative association, regardless of label type. However, none of these associations are significant. The effect of label type is robust to the inclusion of these covariates.
In contrast, attitude has a positive, significant association with attractiveness. The effect of label type remains significant.
simulations <-
simulations %>%
mutate_at(
vars(attitude_pb_vegan, meat_per_week),
list(c = ~ scale(., center = TRUE, scale = FALSE)) # create new columns with _c as suffix
)
attractiveness <-
attractiveness %>%
mutate_at(
vars(attitude_pb_vegan, meat_per_week),
list(c = ~ scale(., center = TRUE, scale = FALSE))
)
# get descriptives
describe_visualize(attractiveness,
attitude_pb_vegan,
FALSE,
TRUE)
## [[1]]
##
##
## x
## ------------- -------------
## nbr.val 166.0000000
## nbr.null 0.0000000
## nbr.na 0.0000000
## min 1.0000000
## max 100.0000000
## range 99.0000000
## sum 9456.0300000
## median 57.6450000
## mean 56.9640361
## SE.mean 1.9082645
## CI.mean.0.95 3.7677646
## var 604.4846136
## std.dev 24.5862688
## coef.var 0.4316104
##
## [[2]]
# fit model
h1_sim_covariates_p <-
mixed(
rating ~
label_type +
attitude_pb_vegan_c +
meat_per_week_c +
(1 + label_type | pp) +
(1 + label_type | food),
simulations,
type = 3,
method = "S",
check_contrasts = FALSE
)
## Fitting one lmer() model. [DONE]
## Calculating p-values. [DONE]
# what is it
anova(h1_sim_covariates_p)
## Mixed Model Anova Table (Type 3 tests, S-method)
##
## Model: rating ~ label_type + attitude_pb_vegan_c + meat_per_week_c +
## Model: (1 + label_type | pp) + (1 + label_type | food)
## Data: simulations
## num Df den Df F Pr(>F)
## label_type 1 95.036 37.4975 2.051e-08 ***
## attitude_pb_vegan_c 1 162.851 3.4540 0.06491 .
## meat_per_week_c 1 162.853 0.1353 0.71343
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# fit model
h1_attr_covariates_p <-
mixed(
rating ~
label_type +
attitude_pb_vegan_c +
meat_per_week_c +
(1 + label_type | pp) +
(1 + label_type | food),
attractiveness,
type = 3,
method = "S",
check_contrasts = FALSE
)
## Fitting one lmer() model. [DONE]
## Calculating p-values. [DONE]
# what is it
anova(h1_attr_covariates_p)
## Mixed Model Anova Table (Type 3 tests, S-method)
##
## Model: rating ~ label_type + attitude_pb_vegan_c + meat_per_week_c +
## Model: (1 + label_type | pp) + (1 + label_type | food)
## Data: attractiveness
## num Df den Df F Pr(>F)
## label_type 1 47.58 10.7438 0.0019589 **
## attitude_pb_vegan_c 1 162.97 14.8507 0.0001671 ***
## meat_per_week_c 1 162.97 0.4095 0.5231183
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
The preregistered hypothesis reads as follows: >This effect [of label types] may be stronger for plant-based food than for meat-based food.
Again, I’ll separate into confirmatory and exploratory sections.
Each combination of label type and food type was present for each participant, meaning that we should include a random effect for the interaction of label type and food type for pp as grouping factor.
For the food grouping, each food type could only be plant-based or meat-based, meaning we cannot include a random slope of food_type. That also means we cannot model the interaction. We counterbalanced which food was assigned what label type across participants. Therefore, we will only model label_type as random slope for the food grouping.
To obtain p-values later, for interactions and Type III tests, we need sum-to-zero contrasts.
# set contrasts
options(contrasts = c("contr.sum", "contr.poly"))
# run the model
h2_sim <-
lme4::lmer(
rating ~
label_type * food_type +
(1 + label_type * food_type | pp) +
(1 + label_type | food),
simulations
)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl =
## control$checkConv, : Model failed to converge with max|grad| = 0.0815543
## (tol = 0.002, component 1)
summary(h2_sim)
## Linear mixed model fit by REML ['lmerMod']
## Formula: rating ~ label_type * food_type + (1 + label_type * food_type |
## pp) + (1 + label_type | food)
## Data: simulations
##
## REML criterion at convergence: 60895.6
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.5354 -0.6100 0.1119 0.6607 3.1025
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## pp (Intercept) 145.4374 12.0597
## label_type1 26.4819 5.1461 0.24
## food_type1 36.3587 6.0298 -0.13 0.11
## label_type1:food_type1 0.0455 0.2133 -0.15 -0.65 -0.27
## food (Intercept) 25.6014 5.0598
## label_type1 3.8378 1.9590 0.02
## Residual 489.5949 22.1268
## Number of obs: 6638, groups: pp, 166; food, 40
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 58.9897 1.2609 46.783
## label_type1 -3.5101 0.5738 -6.117
## food_type1 5.9134 0.9658 6.123
## label_type1:food_type1 0.4667 0.4123 1.132
##
## Correlation of Fixed Effects:
## (Intr) lbl_t1 fd_ty1
## label_type1 0.127
## food_type1 -0.047 0.036
## lbl_typ1:_1 -0.005 -0.018 0.005
## convergence code: 0
## Model failed to converge with max|grad| = 0.0815543 (tol = 0.002, component 1)
The model throws a convergence error. Looking at the summary, nothing really sticks out. Maybe some of the correlations are extremely low, so that might be problematic. Actually, since the newest update of lme4, the model fitting procedure has become more conservative. So I will follow the advice of Barr et al. (2013) and see whether I can get the warning to disappear.
I will start with simply increasing the number of iterations.
h2_sim.2 <-
lme4::lmer(
rating ~
label_type * food_type +
(1 + label_type * food_type | pp) +
(1 + label_type | food),
simulations,
control = lmerControl(optCtrl = list(maxfun = 1e9))
)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl =
## control$checkConv, : Model failed to converge with max|grad| = 0.0815543
## (tol = 0.002, component 1)
The model still gives a convergence error. Next, I will restart from previous fit.
starting_point <- getME(h2_sim.2, c("theta", "fixef"))
h2_sim.3 <- update(h2_sim.2,
start = starting_point,
control = lmerControl(optCtrl = list(maxfun = 1e9)))
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl =
## control$checkConv, : Model failed to converge with max|grad| = 0.0112122
## (tol = 0.002, component 1)
Next, I’ll try out different optimizers. About half of the optimizers lead to a converging model; the ones that converge yield a singular fit warning. Newer versions of lme4 throw this warning quite often, because the estimation has become more conservative. Overall, the fixed effects are (alomost) identical, which gives confidence in the model. The same goes for the random effects, except for one correlation with bobyqa as optimizer, which would explain the singularity warning. Overall, I believe we can trust the estimates and treat the convergence warning as a false-positive.
h2_sim_model_all <- afex::all_fit(h2_sim.3,
maxfun = 1e9)
## bobyqa. : [OK]
## Nelder_Mead. : [OK]
## optimx.nlminb : [ERROR]
## optimx.L-BFGS-B : [ERROR]
## nloptwrap.NLOPT_LN_NELDERMEAD : [OK]
## nloptwrap.NLOPT_LN_BOBYQA : [OK]
## nmkbw. : [ERROR]
# which ones converged
ok_fits <- sapply(h2_sim_model_all, is, "merMod")
ok_fits
## bobyqa. Nelder_Mead.
## TRUE TRUE
## optimx.nlminb optimx.L-BFGS-B
## FALSE FALSE
## nloptwrap.NLOPT_LN_NELDERMEAD nloptwrap.NLOPT_LN_BOBYQA
## TRUE TRUE
## nmkbw.
## FALSE
# was fit okay?
!sapply(h2_sim_model_all, inherits, "try-error")
## bobyqa. Nelder_Mead.
## TRUE TRUE
## optimx.nlminb optimx.L-BFGS-B
## TRUE TRUE
## nloptwrap.NLOPT_LN_NELDERMEAD nloptwrap.NLOPT_LN_BOBYQA
## TRUE TRUE
## nmkbw.
## TRUE
# compare fixed effects
ok_fits_details <- h2_sim_model_all[ok_fits]
t(sapply(ok_fits_details, "fixef"))
## (Intercept) label_type1 food_type1
## bobyqa. 58.98971 -3.51006 5.913429
## Nelder_Mead. 58.98971 -3.51006 5.913429
## nloptwrap.NLOPT_LN_NELDERMEAD 58.98971 -3.51006 5.913429
## nloptwrap.NLOPT_LN_BOBYQA 58.98971 -3.51006 5.913429
## label_type1:food_type1
## bobyqa. 0.4666743
## Nelder_Mead. 0.4666743
## nloptwrap.NLOPT_LN_NELDERMEAD 0.4666722
## nloptwrap.NLOPT_LN_BOBYQA 0.4666722
# compare random effects
t(sapply(ok_fits_details, getME, "theta"))
## pp.(Intercept) pp.label_type1.(Intercept)
## bobyqa. 0.5448175 0.05518278
## Nelder_Mead. 0.5448158 0.05518189
## nloptwrap.NLOPT_LN_NELDERMEAD 0.5447007 0.05508740
## nloptwrap.NLOPT_LN_BOBYQA 0.5447007 0.05508740
## pp.food_type1.(Intercept)
## bobyqa. -0.03566608
## Nelder_Mead. -0.03566500
## nloptwrap.NLOPT_LN_NELDERMEAD -0.03573493
## nloptwrap.NLOPT_LN_BOBYQA -0.03573493
## pp.label_type1:food_type1.(Intercept)
## bobyqa. -0.001472848
## Nelder_Mead. -0.001473771
## nloptwrap.NLOPT_LN_NELDERMEAD -0.001368736
## nloptwrap.NLOPT_LN_BOBYQA -0.001368736
## pp.label_type1 pp.food_type1.label_type1
## bobyqa. 0.2260010 0.03880769
## Nelder_Mead. 0.2260027 0.03880198
## nloptwrap.NLOPT_LN_NELDERMEAD 0.2259277 0.03880994
## nloptwrap.NLOPT_LN_BOBYQA 0.2259277 0.03880994
## pp.label_type1:food_type1.label_type1
## bobyqa. -0.006057909
## Nelder_Mead. -0.006058712
## nloptwrap.NLOPT_LN_NELDERMEAD -0.006141224
## nloptwrap.NLOPT_LN_BOBYQA -0.006141224
## pp.food_type1
## bobyqa. 0.2674025
## Nelder_Mead. 0.2674005
## nloptwrap.NLOPT_LN_NELDERMEAD 0.2673442
## nloptwrap.NLOPT_LN_BOBYQA 0.2673442
## pp.label_type1:food_type1.food_type1
## bobyqa. -0.001787103
## Nelder_Mead. -0.001787343
## nloptwrap.NLOPT_LN_NELDERMEAD -0.001774820
## nloptwrap.NLOPT_LN_BOBYQA -0.001774820
## pp.label_type1:food_type1 food.(Intercept)
## bobyqa. 0.000000e+00 0.2288721
## Nelder_Mead. 3.034817e-05 0.2288742
## nloptwrap.NLOPT_LN_NELDERMEAD 6.067120e-04 0.2288415
## nloptwrap.NLOPT_LN_BOBYQA 6.067120e-04 0.2288415
## food.label_type1.(Intercept)
## bobyqa. 0.001444044
## Nelder_Mead. 0.001444658
## nloptwrap.NLOPT_LN_NELDERMEAD 0.001472892
## nloptwrap.NLOPT_LN_BOBYQA 0.001472892
## food.label_type1
## bobyqa. 0.08877145
## Nelder_Mead. 0.08877055
## nloptwrap.NLOPT_LN_NELDERMEAD 0.08878641
## nloptwrap.NLOPT_LN_BOBYQA 0.08878641
Let’s inspect model diagnostics. They look good, with mostly normal residuals and a linear overall prediction. As for outliers, pp_91 and vegan_filletsstand out a bit, so I will see whether the results hold up with their exclusion.
# inspect standardized residuals
densityplot(resid(h2_sim, scaled = TRUE))
# q-q plot
car::qqPlot(resid(h2_sim, scaled = TRUE))
## 3469 6513
## 3469 6511
# proportion of residuals in the extremes of the distribution
lmer_residuals(h2_sim)
## # A tibble: 3 x 4
## standardized_cutoff proportion_cutoff proportion below_cutoff
## <dbl> <dbl> <dbl> <lgl>
## 1 2 0.05 0.0422 TRUE
## 2 2.5 0.01 0.0128 FALSE
## 3 3 0.0001 0.00286 FALSE
# obtain outliers
h2_sim_outliers_pp <- influence.ME::influence(h2_sim, c("pp"))
h2_sim_outliers_food <- influence.ME::influence(h2_sim, c("food"))
# plot formal outliers for pp
plot(h2_sim_outliers_pp, which = 'cook')
plot(h2_sim_outliers_pp, which = 'dfbetas')[1]
plot(h2_sim_outliers_pp, which = 'dfbetas')[2]
plot(h2_sim_outliers_pp, which = 'dfbetas')[3]
plot(h2_sim_outliers_pp, which = 'dfbetas')[4]
# which ones are the highest
arrange_cook(h2_sim_outliers_pp, "pp")
## # A tibble: 6 x 2
## pp cooks_distance
## <chr> <dbl>
## 1 pp_91 0.0206
## 2 pp_103 0.0138
## 3 pp_48 0.0136
## 4 pp_12 0.0102
## 5 pp_71 0.00854
## 6 pp_109 0.00815
# plot formal outliers for food
plot(h2_sim_outliers_food, which = 'cook')
plot(h2_sim_outliers_food, which = 'dfbetas')[1]
plot(h2_sim_outliers_food, which = 'dfbetas')[2]
plot(h2_sim_outliers_food, which = 'dfbetas')[3]
plot(h2_sim_outliers_food, which = 'dfbetas')[4]
# which ones are so far out on cook's distance?
arrange_cook(h2_sim_outliers_food, "food")
## # A tibble: 6 x 2
## food cooks_distance
## <chr> <dbl>
## 1 vegan_fillets 0.0837
## 2 spring_rolls 0.0625
## 3 veggie_burger 0.0591
## 4 mushroom_burrito 0.0459
## 5 mint_soup 0.0429
## 6 veggie_quiche 0.0428
# plot the fitted vs. the residuals (to check for homo/heteroskedasticity) with added smoothed line.
plot(h2_sim, type = c('p', 'smooth'))
Unsurprisingly, we find two significant main effects, but no interaction: enhanced labels lead to more simulations; meat-based foods lead to more simulations; but the effect of label does not differ for food types.
options(contrasts = c("contr.sum", "contr.poly"))
# get p-value
h2_sim_p <-
mixed(
rating ~
label_type * food_type +
(1 + label_type * food_type | pp) +
(1 + label_type | food),
simulations,
type = 3,
method = "S"
)
## Fitting one lmer() model. [DONE]
## Calculating p-values. [DONE]
# what is it
anova(h2_sim_p)
## Mixed Model Anova Table (Type 3 tests, S-method)
##
## Model: rating ~ label_type * food_type + (1 + label_type * food_type |
## Model: pp) + (1 + label_type | food)
## Data: simulations
## num Df den Df F Pr(>F)
## label_type 1 96.814 37.4192 2.016e-08 ***
## food_type 1 62.301 37.4865 6.794e-08 ***
## label_type:food_type 1 36.309 1.2812 0.2651
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# effect size approximation
r.squaredGLMM(h2_sim)
## R2m R2c
## [1,] 0.06133328 0.3681661
We run the same maximal model as with simulations. Unsurprisingly, the model yields a convergence error. I’ll go through the same trouble shooting steps as above.
# set contrasts
options(contrasts = c("contr.sum", "contr.poly"))
# run the model
h2_attr <-
lme4::lmer(
rating ~
label_type * food_type +
(1 + label_type * food_type | pp) +
(1 + label_type | food),
attractiveness
)
summary(h2_attr)
## Linear mixed model fit by REML ['lmerMod']
## Formula: rating ~ label_type * food_type + (1 + label_type * food_type |
## pp) + (1 + label_type | food)
## Data: attractiveness
##
## REML criterion at convergence: 61971.3
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.4478 -0.6575 0.1043 0.6899 3.2029
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## pp (Intercept) 130.3474 11.4170
## label_type1 7.2060 2.6844 -0.21
## food_type1 50.5751 7.1116 -0.09 -0.06
## label_type1:food_type1 0.3916 0.6257 0.14 0.64 0.64
## food (Intercept) 40.6063 6.3723
## label_type1 5.5472 2.3553 0.28
## Residual 585.3333 24.1937
## Number of obs: 6640, groups: pp, 166; food, 40
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 54.7621 1.3742 39.849
## label_type1 -1.6648 0.5199 -3.202
## food_type1 6.5680 1.1866 5.535
## label_type1:food_type1 0.1223 0.4788 0.255
##
## Correlation of Fixed Effects:
## (Intr) lbl_t1 fd_ty1
## label_type1 0.094
## food_type1 -0.027 -0.012
## lbl_typ1:_1 0.009 0.026 0.218
## convergence code: 0
## Model failed to converge with max|grad| = 0.0284037 (tol = 0.002, component 1)
Increasing the number of iterations still yields a warning.
h2_attr.2 <-
lme4::lmer(
rating ~
label_type * food_type +
(1 + label_type * food_type | pp) +
(1 + label_type | food),
attractiveness,
control = lmerControl(optCtrl = list(maxfun = 1e9))
)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl =
## control$checkConv, : Model failed to converge with max|grad| = 0.0284037
## (tol = 0.002, component 1)
Restarting from previous fit does the same.
starting_point <- getME(h2_attr.2, c("theta", "fixef"))
h2_attr.3 <- update(h2_sim.2,
start = starting_point,
control = lmerControl(optCtrl = list(maxfun = 1e9)))
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl =
## control$checkConv, : Model failed to converge with max|grad| = 0.0177895
## (tol = 0.002, component 1)
Next, I’ll try out different optimizers. The story is similar to the interaction model with simulations. The fixed effects are almost identical. There are slight differences in some of the random effects, but these seem minimal. Once again, I believe we can regard the convergence error as a false-positive.
h2_attr_model_all <- afex::all_fit(h2_attr.3,
maxfun = 1e9)
## bobyqa. : [OK]
## Nelder_Mead. : [OK]
## optimx.nlminb : [ERROR]
## optimx.L-BFGS-B : [ERROR]
## nloptwrap.NLOPT_LN_NELDERMEAD : [OK]
## nloptwrap.NLOPT_LN_BOBYQA : [OK]
## nmkbw. : [ERROR]
# which ones converged
ok_fits <- sapply(h2_attr_model_all, is, "merMod")
ok_fits
## bobyqa. Nelder_Mead.
## TRUE TRUE
## optimx.nlminb optimx.L-BFGS-B
## FALSE FALSE
## nloptwrap.NLOPT_LN_NELDERMEAD nloptwrap.NLOPT_LN_BOBYQA
## TRUE TRUE
## nmkbw.
## FALSE
# was fit okay?
!sapply(h2_attr_model_all, inherits, "try-error")
## bobyqa. Nelder_Mead.
## TRUE TRUE
## optimx.nlminb optimx.L-BFGS-B
## TRUE TRUE
## nloptwrap.NLOPT_LN_NELDERMEAD nloptwrap.NLOPT_LN_BOBYQA
## TRUE TRUE
## nmkbw.
## TRUE
# compare fixed effects
ok_fits_details <- h2_attr_model_all[ok_fits]
t(sapply(ok_fits_details, "fixef"))
## (Intercept) label_type1 food_type1
## bobyqa. 58.98971 -3.510060 5.913429
## Nelder_Mead. 58.98971 -3.510057 5.913437
## nloptwrap.NLOPT_LN_NELDERMEAD 58.98970 -3.510062 5.913428
## nloptwrap.NLOPT_LN_BOBYQA 58.98970 -3.510062 5.913428
## label_type1:food_type1
## bobyqa. 0.4666743
## Nelder_Mead. 0.4666624
## nloptwrap.NLOPT_LN_NELDERMEAD 0.4666713
## nloptwrap.NLOPT_LN_BOBYQA 0.4666713
# compare random effects
t(sapply(ok_fits_details, getME, "theta"))
## pp.(Intercept) pp.label_type1.(Intercept)
## bobyqa. 0.5448176 0.05518274
## Nelder_Mead. 0.5454502 0.05518476
## nloptwrap.NLOPT_LN_NELDERMEAD 0.5447853 0.05516976
## nloptwrap.NLOPT_LN_BOBYQA 0.5447853 0.05516976
## pp.food_type1.(Intercept)
## bobyqa. -0.03566596
## Nelder_Mead. -0.03573160
## nloptwrap.NLOPT_LN_NELDERMEAD -0.03580253
## nloptwrap.NLOPT_LN_BOBYQA -0.03580253
## pp.label_type1:food_type1.(Intercept)
## bobyqa. -0.001472889
## Nelder_Mead. -0.001280523
## nloptwrap.NLOPT_LN_NELDERMEAD -0.001312592
## nloptwrap.NLOPT_LN_BOBYQA -0.001312592
## pp.label_type1 pp.food_type1.label_type1
## bobyqa. 0.2260011 0.03880775
## Nelder_Mead. 0.2262993 0.03891334
## nloptwrap.NLOPT_LN_NELDERMEAD 0.2260459 0.03877229
## nloptwrap.NLOPT_LN_BOBYQA 0.2260459 0.03877229
## pp.label_type1:food_type1.label_type1
## bobyqa. -0.006057891
## Nelder_Mead. -0.006165599
## nloptwrap.NLOPT_LN_NELDERMEAD -0.006034952
## nloptwrap.NLOPT_LN_BOBYQA -0.006034952
## pp.food_type1
## bobyqa. 0.2674023
## Nelder_Mead. 0.2677056
## nloptwrap.NLOPT_LN_NELDERMEAD 0.2674238
## nloptwrap.NLOPT_LN_BOBYQA 0.2674238
## pp.label_type1:food_type1.food_type1
## bobyqa. -0.001787118
## Nelder_Mead. -0.001464064
## nloptwrap.NLOPT_LN_NELDERMEAD -0.001843422
## nloptwrap.NLOPT_LN_BOBYQA -0.001843422
## pp.label_type1:food_type1 food.(Intercept)
## bobyqa. 1.143763e-08 0.2288724
## Nelder_Mead. 1.098753e-02 0.2296837
## nloptwrap.NLOPT_LN_NELDERMEAD 1.044228e-03 0.2289632
## nloptwrap.NLOPT_LN_BOBYQA 1.044228e-03 0.2289632
## food.label_type1.(Intercept)
## bobyqa. 0.001444172
## Nelder_Mead. 0.001186154
## nloptwrap.NLOPT_LN_NELDERMEAD 0.001188185
## nloptwrap.NLOPT_LN_BOBYQA 0.001188185
## food.label_type1
## bobyqa. 0.08877141
## Nelder_Mead. 0.08934733
## nloptwrap.NLOPT_LN_NELDERMEAD 0.08875742
## nloptwrap.NLOPT_LN_BOBYQA 0.08875742
The model diagnostics look good, similar to the simulation model. No particular participant stands out as an outlier, maybe pp_59. As for foods, veggie_burger sticks out quite clearly. I’ll check whether the effects are robust to the exclusion of those two potential outliers.
# inspect standardized residuals
densityplot(resid(h2_attr, scaled = TRUE))
# q-q plot
car::qqPlot(resid(h2_attr, scaled = TRUE))
## [1] 316 1141
# proportion of residuals in the extremes of the distribution
lmer_residuals(h2_attr)
## # A tibble: 3 x 4
## standardized_cutoff proportion_cutoff proportion below_cutoff
## <dbl> <dbl> <dbl> <lgl>
## 1 2 0.05 0.0358 TRUE
## 2 2.5 0.01 0.00828 TRUE
## 3 3 0.0001 0.000602 FALSE
# obtain outliers
h2_attr_outliers_pp <- influence.ME::influence(h2_attr, c("pp"))
h2_attr_outliers_food <- influence.ME::influence(h2_attr, c("food"))
# plot formal outliers for pp
plot(h2_attr_outliers_pp, which = 'cook')
plot(h2_attr_outliers_pp, which = 'dfbetas')[1]
plot(h2_attr_outliers_pp, which = 'dfbetas')[2]
plot(h2_attr_outliers_pp, which = 'dfbetas')[3]
plot(h2_attr_outliers_pp, which = 'dfbetas')[4]
# which ones are the highest
arrange_cook(h2_attr_outliers_pp, "pp")
## # A tibble: 6 x 2
## pp cooks_distance
## <chr> <dbl>
## 1 pp_59 0.0134
## 2 pp_181 0.00940
## 3 pp_41 0.00909
## 4 pp_154 0.00887
## 5 pp_115 0.00744
## 6 pp_129 0.00730
# plot formal outliers for food
plot(h2_attr_outliers_food, which = 'cook')
plot(h2_attr_outliers_food, which = 'dfbetas')[1]
plot(h2_attr_outliers_food, which = 'dfbetas')[2]
plot(h2_attr_outliers_food, which = 'dfbetas')[3]
plot(h2_attr_outliers_food, which = 'dfbetas')[4]
# which ones are so far out on cook's distance?
arrange_cook(h2_attr_outliers_food, "food")
## # A tibble: 6 x 2
## food cooks_distance
## <chr> <dbl>
## 1 veggie_burger 0.207
## 2 spring_rolls 0.105
## 3 noddles_veggies 0.0890
## 4 steam_bags 0.0488
## 5 salsa_pizza 0.0315
## 6 vegan_fillets 0.0303
# plot the fitted vs. the residuals (to check for homo/heteroskedasticity) with added smoothed line.
plot(h2_attr, type = c('p', 'smooth'))
As expected, the two main effects are significant, but the interaction is not.
# get p-value
h2_attr_p <-
mixed(
rating ~
label_type * food_type +
(1 + label_type * food_type | pp) +
(1 + label_type | food),
attractiveness,
type = 3,
method = "S"
)
## Fitting one lmer() model. [DONE]
## Calculating p-values. [DONE]
# what is it
anova(h2_attr_p)
## Mixed Model Anova Table (Type 3 tests, S-method)
##
## Model: rating ~ label_type * food_type + (1 + label_type * food_type |
## Model: pp) + (1 + label_type | food)
## Data: attractiveness
## num Df den Df F Pr(>F)
## label_type 1 48.821 10.2553 0.0024 **
## food_type 1 60.109 30.6388 7.194e-07 ***
## label_type:food_type 1 37.648 0.0653 0.7998
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# approximate effect size
r.squaredGLMM(h2_attr)
## R2m R2c
## [1,] 0.05304371 0.3240482
Once more, I run several robustness checks. I begin with including the eight people with a high RSI. The results don’t change.
# get p-value for simulations
h2_sim_p_rsi <-
mixed(
rating ~
label_type * food_type +
(1 + label_type * food_type | pp) +
(1 + label_type | food),
data = full_join(
simulations,
high_rsi %>%
filter(measure == "simulations")
),
type = 3,
method = "S"
)
## Fitting one lmer() model. [DONE]
## Calculating p-values. [DONE]
anova(h2_sim_p_rsi)
## Mixed Model Anova Table (Type 3 tests, S-method)
##
## Model: rating ~ label_type * food_type + (1 + label_type * food_type |
## Model: pp) + (1 + label_type | food)
## Data: full_join
## Data: simulations
## Data: high_rsi %>% filter(measure == "simulations")
## num Df den Df F Pr(>F)
## label_type 1 101.726 34.7022 5.004e-08 ***
## food_type 1 61.898 36.7000 8.923e-08 ***
## label_type:food_type 1 36.251 1.1463 0.2914
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# get p-value for attractiveness
h2_attr_p_rsi <-
mixed(
rating ~
label_type * food_type +
(1 + label_type * food_type | pp) +
(1 + label_type | food),
data = full_join(
attractiveness,
high_rsi %>%
filter(measure == "attractiveness")
),
type = 3,
method = "S"
)
## Fitting one lmer() model. [DONE]
## Calculating p-values. [DONE]
anova(h2_attr_p_rsi)
## Mixed Model Anova Table (Type 3 tests, S-method)
##
## Model: rating ~ label_type * food_type + (1 + label_type * food_type |
## Model: pp) + (1 + label_type | food)
## Data: full_join
## Data: attractiveness
## Data: high_rsi %>% filter(measure == "attractiveness")
## num Df den Df F Pr(>F)
## label_type 1 48.822 10.2343 0.002423 **
## food_type 1 58.834 30.2673 8.594e-07 ***
## label_type:food_type 1 37.666 0.1282 0.722298
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
The effects on simulations are robust to the exclusion of outliers.
# get p-value
h2_sim_no_outliers <-
mixed(
rating ~
label_type * food_type +
(1 + label_type * food_type | pp) +
(1 + label_type | food),
simulations %>%
filter(
!pp %in% c("pp_91"),
food != "vegan_fillets"
),
type = 3,
method = "S"
)
## Fitting one lmer() model. [DONE]
## Calculating p-values. [DONE]
# what is it
anova(h2_sim_no_outliers)
## Mixed Model Anova Table (Type 3 tests, S-method)
##
## Model: rating ~ label_type * food_type + (1 + label_type * food_type |
## Model: pp) + (1 + label_type | food)
## Data: %>%
## Data: simulations
## Data: filter(!pp %in% c("pp_91"), food != "vegan_fillets")
## num Df den Df F Pr(>F)
## label_type 1 86.609 38.4520 1.853e-08 ***
## food_type 1 62.669 38.0261 5.613e-08 ***
## label_type:food_type 1 35.623 1.8904 0.1777
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
The same goes for effects on attractiveness: both main effects are robust to the exclusion of outliers.
# get p-value
h2_attr_no_outliers <-
mixed(
rating ~
label_type * food_type +
(1 + label_type * food_type | pp) +
(1 + label_type | food),
attractiveness %>%
filter(
!pp %in% c("pp_59"),
food != "veggie_burger"
),
type = 3,
method = "S"
)
## Fitting one lmer() model. [DONE]
## Calculating p-values. [DONE]
# what is it
anova(h2_attr_no_outliers)
## Mixed Model Anova Table (Type 3 tests, S-method)
##
## Model: rating ~ label_type * food_type + (1 + label_type * food_type |
## Model: pp) + (1 + label_type | food)
## Data: %>%
## Data: attractiveness
## Data: filter(!pp %in% c("pp_59"), food != "veggie_burger")
## num Df den Df F Pr(>F)
## label_type 1 48.665 9.9206 0.002793 **
## food_type 1 57.807 28.7523 1.503e-06 ***
## label_type:food_type 1 36.915 0.1557 0.695462
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
The preregistered hypothesis reads as follows: >Across foods, simulation ratings will predict attractiveness ratings.
Currently, the working file follows tidyverse conventions, with one observation per row. That means each participant has two rows per trials, one for their rating on simulations, one for their rating on attractiveness. The variables measure shows to what measure the rating belongs.
To predict attractiveness from simulations, the ratings need to be on the same row, meaning a separate rating variable for both measures. So far, I split up working_file into a simulations file and an attractiveness file. I now combine those two to have both ratings per trial.
Unfortunately, this is not super-straightforward: Both measures were presented on their own page, meaning that all variables relating to time (e.g., page_median_time) will be different in the two data sets. We won’t need those for the merging, so I’ll remove any variables that aren’t identical across the two data sets (except for the ratings, of course). This is basically the same as spreading/pivoting working_file to wide format along the measure variable, but at this point I’ve done so many transformations etc. that removing and merging is easier.
# remove time variables from simulation data set
sim_merge <-
simulations %>%
rename(simulation_rating = rating) %>%
select(
-measure,
-time,
-page_time_median,
-contains("speed_factor")
)
# same for attractiveness data set
attr_merge <-
attractiveness %>%
rename(attractiveness_rating = rating) %>%
select(
-measure,
-time,
-page_time_median,
-contains("speed_factor")
)
# merge the two
wide_data <-
left_join(
sim_merge,
attr_merge
) %>%
select( # reorder variable names, purely cosmetic
pp:simulation_rating,
attractiveness_rating,
everything()
)
Next, we predict attractiveness with simulations, whilst controlling for label and food type. simulations is continuous, so I’ll center it. The model fails to converge.
# what's the raw correlation
cor(wide_data$simulation_rating, wide_data$attractiveness_rating, use = "pairwise.complete.obs")
## [1] 0.5046872
# center predictor
wide_data <-
wide_data %>%
mutate(
simulation_rating_c = scale(simulation_rating, center = TRUE, scale = FALSE)
)
wide_data %>%
count(food, label_type)
## # A tibble: 80 x 3
## food label_type n
## <fct> <fct> <int>
## 1 chicken_balti control 84
## 2 chicken_balti enhanced 82
## 3 pepperoni_pizza control 82
## 4 pepperoni_pizza enhanced 84
## 5 empanadas control 82
## 6 empanadas enhanced 84
## 7 steak_pie control 82
## 8 steak_pie enhanced 84
## 9 pasta_bake control 84
## 10 pasta_bake enhanced 82
## # ... with 70 more rows
# set contrasts
options(contrasts = c("contr.sum", "contr.poly"))
# run the model
h3_main <-
lme4::lmer(
attractiveness_rating ~
simulation_rating_c + label_type + food_type +
(1 + simulation_rating_c + label_type + food_type | pp) +
(1 + simulation_rating_c + label_type | food),
wide_data
)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl =
## control$checkConv, : Model failed to converge with max|grad| = 0.598793
## (tol = 0.002, component 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model is nearly unidentifiable: very large eigenvalue
## - Rescale variables?
Often, it can help to standardize continuous predictors. The model still throws a convergence warning, so I’ll continue with the same troubleshooting steps as previously.
# standardize attractiveness rating
wide_data <-
wide_data %>%
mutate(
simulation_rating_s = scale(simulation_rating, scale = TRUE)
)
# run the model
h3_main_s <-
lme4::lmer(
attractiveness_rating ~
simulation_rating_s + label_type + food_type +
(1 + simulation_rating_s + label_type + food_type | pp) +
(1 + simulation_rating_s + label_type | food),
wide_data
)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl =
## control$checkConv, : Model failed to converge with max|grad| = 0.0131649
## (tol = 0.002, component 1)
summary(h3_main_s)
## Linear mixed model fit by REML ['lmerMod']
## Formula:
## attractiveness_rating ~ simulation_rating_s + label_type + food_type +
## (1 + simulation_rating_s + label_type + food_type | pp) +
## (1 + simulation_rating_s + label_type | food)
## Data: wide_data
##
## REML criterion at convergence: 60541.4
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.9463 -0.5759 0.0826 0.6397 3.8189
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## pp (Intercept) 82.237 9.068
## simulation_rating_s 29.865 5.465 0.10
## label_type1 4.135 2.033 -0.38 0.57
## food_type1 28.101 5.301 -0.08 -0.08 -0.04
## food (Intercept) 17.020 4.126
## simulation_rating_s 1.955 1.398 -0.26
## label_type1 2.610 1.615 0.33 0.61
## Residual 471.768 21.720
## Number of obs: 6638, groups: pp, 166; food, 40
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 54.7725 1.0052 54.489
## simulation_rating_s 12.3703 0.5971 20.716
## label_type1 -0.1942 0.4087 -0.475
## food_type1 3.9987 0.7925 5.046
##
## Correlation of Fixed Effects:
## (Intr) smlt__ lbl_t1
## smltn_rtng_ -0.019
## label_type1 0.033 0.365
## food_type1 -0.039 -0.073 -0.018
## convergence code: 0
## Model failed to converge with max|grad| = 0.0131649 (tol = 0.002, component 1)
Increase number of iterations. Still receive a warning.
# run the model
h3_main_s.2 <-
lme4::lmer(
attractiveness_rating ~
simulation_rating_s + label_type + food_type +
(1 + simulation_rating_s + label_type + food_type | pp) +
(1 + simulation_rating_s + label_type | food),
wide_data,
control = lmerControl(optCtrl = list(maxfun = 1e9))
)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl =
## control$checkConv, : Model failed to converge with max|grad| = 0.0131649
## (tol = 0.002, component 1)
summary(h3_main_s.2)
## Linear mixed model fit by REML ['lmerMod']
## Formula:
## attractiveness_rating ~ simulation_rating_s + label_type + food_type +
## (1 + simulation_rating_s + label_type + food_type | pp) +
## (1 + simulation_rating_s + label_type | food)
## Data: wide_data
## Control: lmerControl(optCtrl = list(maxfun = 1e+09))
##
## REML criterion at convergence: 60541.4
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.9463 -0.5759 0.0826 0.6397 3.8189
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## pp (Intercept) 82.237 9.068
## simulation_rating_s 29.865 5.465 0.10
## label_type1 4.135 2.033 -0.38 0.57
## food_type1 28.101 5.301 -0.08 -0.08 -0.04
## food (Intercept) 17.020 4.126
## simulation_rating_s 1.955 1.398 -0.26
## label_type1 2.610 1.615 0.33 0.61
## Residual 471.768 21.720
## Number of obs: 6638, groups: pp, 166; food, 40
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 54.7725 1.0052 54.489
## simulation_rating_s 12.3703 0.5971 20.716
## label_type1 -0.1942 0.4087 -0.475
## food_type1 3.9987 0.7925 5.046
##
## Correlation of Fixed Effects:
## (Intr) smlt__ lbl_t1
## smltn_rtng_ -0.019
## label_type1 0.033 0.365
## food_type1 -0.039 -0.073 -0.018
## convergence code: 0
## Model failed to converge with max|grad| = 0.0131649 (tol = 0.002, component 1)
Restart from previous fit. Same warning.
# run the model
starting_point <- getME(h3_main_s.2, c("theta", "fixef"))
h3_main_s.3 <- update(h3_main_s.2,
start = starting_point,
control = lmerControl(optCtrl = list(maxfun = 1e9)))
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl =
## control$checkConv, : Model failed to converge with max|grad| = 0.00572005
## (tol = 0.002, component 1)
summary(h3_main_s.3)
## Linear mixed model fit by REML ['lmerMod']
## Formula:
## attractiveness_rating ~ simulation_rating_s + label_type + food_type +
## (1 + simulation_rating_s + label_type + food_type | pp) +
## (1 + simulation_rating_s + label_type | food)
## Data: wide_data
## Control: lmerControl(optCtrl = list(maxfun = 1e+09))
##
## REML criterion at convergence: 60541.4
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.9461 -0.5759 0.0823 0.6397 3.8187
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## pp (Intercept) 82.289 9.071
## simulation_rating_s 29.879 5.466 0.10
## label_type1 4.139 2.034 -0.38 0.57
## food_type1 28.091 5.300 -0.08 -0.08 -0.04
## food (Intercept) 17.024 4.126
## simulation_rating_s 1.948 1.396 -0.26
## label_type1 2.605 1.614 0.33 0.61
## Residual 471.760 21.720
## Number of obs: 6638, groups: pp, 166; food, 40
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 54.7726 1.0054 54.478
## simulation_rating_s 12.3703 0.5971 20.718
## label_type1 -0.1943 0.4086 -0.475
## food_type1 3.9991 0.7924 5.047
##
## Correlation of Fixed Effects:
## (Intr) smlt__ lbl_t1
## smltn_rtng_ -0.019
## label_type1 0.032 0.365
## food_type1 -0.039 -0.073 -0.018
## convergence code: 0
## Model failed to converge with max|grad| = 0.00572005 (tol = 0.002, component 1)
Like previously, I check whether the estimates are consistent across optimizers. Once again, the estimates are identical across optimizers.
h3_main_model_s_all <- afex::all_fit(h3_main_s.3,
maxfun = 1e9)
## bobyqa. : [OK]
## Nelder_Mead. : [OK]
## optimx.nlminb : [ERROR]
## optimx.L-BFGS-B : [ERROR]
## nloptwrap.NLOPT_LN_NELDERMEAD :
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl =
## control$checkConv, : Model failed to converge with max|grad| = 0.00572005
## (tol = 0.002, component 1)
## [OK]
## nloptwrap.NLOPT_LN_BOBYQA :
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl =
## control$checkConv, : Model failed to converge with max|grad| = 0.00572005
## (tol = 0.002, component 1)
## [OK]
## nmkbw. : [ERROR]
# which ones converged
ok_fits <- sapply(h3_main_model_s_all, is, "merMod")
ok_fits
## bobyqa. Nelder_Mead.
## TRUE TRUE
## optimx.nlminb optimx.L-BFGS-B
## FALSE FALSE
## nloptwrap.NLOPT_LN_NELDERMEAD nloptwrap.NLOPT_LN_BOBYQA
## TRUE TRUE
## nmkbw.
## FALSE
# was fit okay?
!sapply(h3_main_model_s_all, inherits, "try-error")
## bobyqa. Nelder_Mead.
## TRUE TRUE
## optimx.nlminb optimx.L-BFGS-B
## TRUE TRUE
## nloptwrap.NLOPT_LN_NELDERMEAD nloptwrap.NLOPT_LN_BOBYQA
## TRUE TRUE
## nmkbw.
## TRUE
# compare fixed effects
ok_fits_details <- h3_main_model_s_all[ok_fits]
t(sapply(ok_fits_details, "fixef"))
## (Intercept) simulation_rating_s label_type1
## bobyqa. 54.77261 12.37032 -0.1942594
## Nelder_Mead. 54.77261 12.37033 -0.1942573
## nloptwrap.NLOPT_LN_NELDERMEAD 54.77259 12.37034 -0.1942530
## nloptwrap.NLOPT_LN_BOBYQA 54.77259 12.37034 -0.1942530
## food_type1
## bobyqa. 3.999129
## Nelder_Mead. 3.999130
## nloptwrap.NLOPT_LN_NELDERMEAD 3.999085
## nloptwrap.NLOPT_LN_BOBYQA 3.999085
# compare random effects
t(sapply(ok_fits_details, getME, "theta"))
## pp.(Intercept)
## bobyqa. 0.4176410
## Nelder_Mead. 0.4176448
## nloptwrap.NLOPT_LN_NELDERMEAD 0.4176480
## nloptwrap.NLOPT_LN_BOBYQA 0.4176480
## pp.simulation_rating_s.(Intercept)
## bobyqa. 0.02396929
## Nelder_Mead. 0.02397201
## nloptwrap.NLOPT_LN_NELDERMEAD 0.02398978
## nloptwrap.NLOPT_LN_BOBYQA 0.02398978
## pp.label_type1.(Intercept)
## bobyqa. -0.03580145
## Nelder_Mead. -0.03580073
## nloptwrap.NLOPT_LN_NELDERMEAD -0.03580506
## nloptwrap.NLOPT_LN_BOBYQA -0.03580506
## pp.food_type1.(Intercept)
## bobyqa. -0.01897510
## Nelder_Mead. -0.01897814
## nloptwrap.NLOPT_LN_NELDERMEAD -0.01894625
## nloptwrap.NLOPT_LN_BOBYQA -0.01894625
## pp.simulation_rating_s
## bobyqa. 0.2505218
## Nelder_Mead. 0.2505221
## nloptwrap.NLOPT_LN_NELDERMEAD 0.2505170
## nloptwrap.NLOPT_LN_BOBYQA 0.2505170
## pp.label_type1.simulation_rating_s
## bobyqa. 0.05688584
## Nelder_Mead. 0.05688757
## nloptwrap.NLOPT_LN_NELDERMEAD 0.05687374
## nloptwrap.NLOPT_LN_BOBYQA 0.05687374
## pp.food_type1.simulation_rating_s
## bobyqa. -0.01795389
## Nelder_Mead. -0.01795366
## nloptwrap.NLOPT_LN_NELDERMEAD -0.01807340
## nloptwrap.NLOPT_LN_BOBYQA -0.01807340
## pp.label_type1 pp.food_type1.label_type1
## bobyqa. 0.06523339 -0.008715632
## Nelder_Mead. 0.06523336 -0.008717828
## nloptwrap.NLOPT_LN_NELDERMEAD 0.06524478 -0.008691725
## nloptwrap.NLOPT_LN_BOBYQA 0.06524478 -0.008691725
## pp.food_type1 food.(Intercept)
## bobyqa. 0.2424429 0.1899555
## Nelder_Mead. 0.2424367 0.1899572
## nloptwrap.NLOPT_LN_NELDERMEAD 0.2424520 0.1899649
## nloptwrap.NLOPT_LN_BOBYQA 0.2424520 0.1899649
## food.simulation_rating_s.(Intercept)
## bobyqa. -0.01689705
## Nelder_Mead. -0.01689668
## nloptwrap.NLOPT_LN_NELDERMEAD -0.01689815
## nloptwrap.NLOPT_LN_BOBYQA -0.01689815
## food.label_type1.(Intercept)
## bobyqa. 0.02422455
## Nelder_Mead. 0.02422853
## nloptwrap.NLOPT_LN_NELDERMEAD 0.02421897
## nloptwrap.NLOPT_LN_BOBYQA 0.02421897
## food.simulation_rating_s
## bobyqa. 0.06199573
## Nelder_Mead. 0.06199344
## nloptwrap.NLOPT_LN_NELDERMEAD 0.06199703
## nloptwrap.NLOPT_LN_BOBYQA 0.06199703
## food.label_type1.simulation_rating_s
## bobyqa. 0.05362565
## Nelder_Mead. 0.05362015
## nloptwrap.NLOPT_LN_NELDERMEAD 0.05357641
## nloptwrap.NLOPT_LN_BOBYQA 0.05357641
## food.label_type1
## bobyqa. 0.04542658
## Nelder_Mead. 0.04542826
## nloptwrap.NLOPT_LN_NELDERMEAD 0.04544769
## nloptwrap.NLOPT_LN_BOBYQA 0.04544769
Next, I inspect the model diagnostics. pp_59 could be an outlier. As for foods, spring_rolls and veggie_burger are potential outliers.
# inspect standardized residuals
densityplot(resid(h3_main, scaled = TRUE))
# q-q plot
car::qqPlot(resid(h3_main, scaled = TRUE))
## 316 5213
## 316 5212
# proportion of residuals in the extremes of the distribution
lmer_residuals(h3_main)
## # A tibble: 3 x 4
## standardized_cutoff proportion_cutoff proportion below_cutoff
## <dbl> <dbl> <dbl> <lgl>
## 1 2 0.05 0.0461 TRUE
## 2 2.5 0.01 0.0140 FALSE
## 3 3 0.0001 0.00286 FALSE
# obtain outliers
h3_main_outliers_pp <- influence.ME::influence(h3_main, c("pp"))
h3_main_outliers_food <- influence.ME::influence(h3_main, c("food"))
# plot formal outliers for pp
plot(h3_main_outliers_pp, which = 'cook')
plot(h3_main_outliers_pp, which = 'dfbetas')[1]
plot(h3_main_outliers_pp, which = 'dfbetas')[2]
plot(h3_main_outliers_pp, which = 'dfbetas')[3]
plot(h3_main_outliers_pp, which = 'dfbetas')[4]
# which ones are the highest
arrange_cook(h3_main_outliers_pp, "pp")
## # A tibble: 6 x 2
## pp cooks_distance
## <chr> <dbl>
## 1 pp_59 0.0254
## 2 pp_126 0.0187
## 3 pp_39 0.0169
## 4 pp_66 0.0152
## 5 pp_181 0.0138
## 6 pp_12 0.0135
# plot formal outliers for food
plot(h3_main_outliers_food, which = 'cook')
plot(h3_main_outliers_food, which = 'dfbetas')[1]
plot(h3_main_outliers_food, which = 'dfbetas')[2]
plot(h3_main_outliers_food, which = 'dfbetas')[3]
plot(h3_main_outliers_food, which = 'dfbetas')[4]
# which ones are so far out on cook's distance?
arrange_cook(h3_main_outliers_food, "food")
## # A tibble: 6 x 2
## food cooks_distance
## <chr> <dbl>
## 1 spring_rolls 0.128
## 2 veggie_burger 0.115
## 3 pepperoni_pizza 0.0792
## 4 noddles_veggies 0.0584
## 5 steam_bags 0.0425
## 6 chicken_breast 0.0391
# plot the fitted vs. the residuals (to check for homo/heteroskedasticity) with added smoothed line.
plot(h3_main, type = c('p', 'smooth'))
Once we include simulations as predictor, labels don’t have an effect on attractiveness anymore. This can have several explanations. Simulation and attractiveness ratings are highly correlated, meaning that simulations explain so much variance that there is little left to explain for the label type. It could also be an issue of multicollinearity. It is also possible that simulations mediate the effect of label type. However, attractiveness was assessed before simulations, ruling out causal mediation.
# get p-value
h3_main_p <-
mixed(
attractiveness_rating ~
simulation_rating_s + label_type + food_type +
(1 + simulation_rating_s + label_type + food_type | pp) +
(1 + simulation_rating_s + label_type | food),
wide_data,
type = 3,
method = "S"
)
## Fitting one lmer() model. [DONE]
## Calculating p-values. [DONE]
# what is it
summary(h3_main_p)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula:
## attractiveness_rating ~ simulation_rating_s + label_type + food_type +
## (1 + simulation_rating_s + label_type + food_type | pp) +
## (1 + simulation_rating_s + label_type | food)
## Data: data
##
## REML criterion at convergence: 60541.4
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.9463 -0.5759 0.0826 0.6397 3.8189
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## pp (Intercept) 82.237 9.068
## simulation_rating_s 29.865 5.465 0.10
## label_type1 4.135 2.033 -0.38 0.57
## food_type1 28.101 5.301 -0.08 -0.08 -0.04
## food (Intercept) 17.020 4.126
## simulation_rating_s 1.955 1.398 -0.26
## label_type1 2.610 1.615 0.33 0.61
## Residual 471.768 21.720
## Number of obs: 6638, groups: pp, 166; food, 40
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 54.7725 1.0052 115.1940 54.489 < 2e-16 ***
## simulation_rating_s 12.3703 0.5971 130.5529 20.716 < 2e-16 ***
## label_type1 -0.1942 0.4087 47.3314 -0.475 0.637
## food_type1 3.9987 0.7925 68.2239 5.046 3.57e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) smlt__ lbl_t1
## smltn_rtng_ -0.019
## label_type1 0.033 0.365
## food_type1 -0.039 -0.073 -0.018
## convergence code: 0
## Model failed to converge with max|grad| = 0.0131649 (tol = 0.002, component 1)
# approximate effect size
r.squaredGLMM(h3_main)
## R2m R2c
## [1,] 0.2310311 0.4287556
Once more, I run several robustness checks. I begin with including the eight people with a high RSI. For that, I need to merge the high-rsi cases with the analysis file. The results don’t change.
# create high rsi file for merging
rsi_merge <-
high_rsi %>%
select( # remove the same time variables
-time,
-page_time_median,
-contains("speed_factor")
) %>%
pivot_wider( # turn into wide format
names_from = measure,
values_from = rating
) %>%
rename( # same names as in data_wide
attractiveness_rating = attractiveness,
simulation_rating = simulations
)
# get p-value for simulations
h3_main_p_rsi <-
mixed(
attractiveness_rating ~
simulation_rating_s + label_type + food_type +
(1 + simulation_rating_s + label_type + food_type | pp) +
(1 + simulation_rating_s + label_type | food),
data = left_join(
wide_data %>%
select(
pp:rsi # so we have the same columns in both data sets, didn't do transformations (centering etc.) for high_rsi data set
),
high_rsi
) %>%
mutate(
simulation_rating_s = scale(simulation_rating, scale = TRUE)
),
type = 3,
method = "S"
)
## Fitting one lmer() model. [DONE]
## Calculating p-values. [DONE]
anova(h3_main_p_rsi)
## Mixed Model Anova Table (Type 3 tests, S-method)
##
## Model: attractiveness_rating ~ simulation_rating_s + label_type + food_type +
## Model: (1 + simulation_rating_s + label_type + food_type | pp) +
## Model: (1 + simulation_rating_s + label_type | food)
## Data: %>%
## Data: left_join(wide_data %>% select(pp:rsi), high_rsi)
## Data: mutate(simulation_rating_s = scale(simulation_rating, scale = TRUE))
## num Df den Df F Pr(>F)
## simulation_rating_s 1 130.553 429.1439 < 2.2e-16 ***
## label_type 1 47.331 0.2259 0.6368
## food_type 1 68.224 25.4600 3.571e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
The effects on simulations are robust to the exclusion of outliers.
# get p-value
h3_main_no_outliers <-
mixed(
attractiveness_rating ~
simulation_rating_s + label_type + food_type +
(1 + simulation_rating_s + label_type + food_type | pp) +
(1 + simulation_rating_s + label_type | food),
wide_data %>%
filter(
!pp %in% c("pp_59"),
!food %in% c("spring_rolls", "veggie_burger")
),
type = 3,
method = "S"
)
## Fitting one lmer() model. [DONE]
## Calculating p-values. [DONE]
# what is it
anova(h3_main_no_outliers)
## Mixed Model Anova Table (Type 3 tests, S-method)
##
## Model: attractiveness_rating ~ simulation_rating_s + label_type + food_type +
## Model: (1 + simulation_rating_s + label_type + food_type | pp) +
## Model: (1 + simulation_rating_s + label_type | food)
## Data: %>%
## Data: wide_data
## Data: filter(!pp %in% c("pp_59"), !food %in% c("spring_rolls", "veggie_burger"))
## num Df den Df F Pr(>F)
## simulation_rating_s 1 127.311 414.6296 < 2.2e-16 ***
## label_type 1 51.375 0.0271 0.8698
## food_type 1 76.167 35.8727 6.499e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Next, we want to know whether simulation ratings interaction with either of the factors. I begin with adding an interaction term of simulation_rating_s and label_type. Actually, we don’t even need to fit a model, the two slopes are almost identical (although this doesn’t take the nested nature into account), backed up by the low estimate and p-value.
# plot the interaction
ggplot(wide_data,
aes(
x = simulation_rating_s,
y = attractiveness_rating
)
)+
geom_smooth(method = "lm", aes(color = label_type))
# set contrasts
options(contrasts = c("contr.sum", "contr.poly"))
# run the model
h3_interaction_label_type <-
mixed(
attractiveness_rating ~
simulation_rating_s*label_type + food_type +
(1 + simulation_rating_s*label_type + food_type | pp) +
(1 + simulation_rating_s*label_type | food),
wide_data,
type = 3,
method = "S"
)
## Fitting one lmer() model. [DONE]
## Calculating p-values. [DONE]
summary(h3_interaction_label_type)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula:
## attractiveness_rating ~ simulation_rating_s * label_type + food_type +
## (1 + simulation_rating_s * label_type + food_type | pp) +
## (1 + simulation_rating_s * label_type | food)
## Data: data
##
## REML criterion at convergence: 60531.8
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.9158 -0.5767 0.0802 0.6392 3.7989
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## pp (Intercept) 84.4495 9.1896
## simulation_rating_s 30.3198 5.5063 0.08
## label_type1 2.2216 1.4905 -0.53 0.80
## food_type1 27.9621 5.2879 -0.07 -0.07
## simulation_rating_s:label_type1 1.7575 1.3257 0.36 -0.43
## food (Intercept) 17.3046 4.1599
## simulation_rating_s 2.0992 1.4489 -0.24
## label_type1 3.1256 1.7679 0.35 0.54
## simulation_rating_s:label_type1 0.6853 0.8278 0.19 0.42
## Residual 470.9667 21.7018
##
##
##
##
## 0.03
## -0.53 0.24
##
##
##
## 0.87
##
## Number of obs: 6638, groups: pp, 166; food, 40
##
## Fixed effects:
## Estimate Std. Error df t value
## (Intercept) 54.8173 1.0171 114.7815 53.895
## simulation_rating_s 12.3134 0.6034 128.7992 20.407
## label_type1 -0.2526 0.4132 42.8465 -0.611
## food_type1 4.0344 0.7930 68.0733 5.088
## simulation_rating_s:label_type1 0.3558 0.3365 43.9662 1.057
## Pr(>|t|)
## (Intercept) < 2e-16 ***
## simulation_rating_s < 2e-16 ***
## label_type1 0.544
## food_type1 3.05e-06 ***
## simulation_rating_s:label_type1 0.296
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) smlt__ lbl_t1 fd_ty1
## smltn_rtng_ -0.029
## label_type1 0.048 0.373
## food_type1 -0.036 -0.069 -0.004
## smltn_r_:_1 0.174 -0.087 0.170 0.069
## convergence code: 0
## unable to evaluate scaled gradient
## Model failed to converge: degenerate Hessian with 1 negative eigenvalues
anova(h3_interaction_label_type)
## Mixed Model Anova Table (Type 3 tests, S-method)
##
## Model: attractiveness_rating ~ simulation_rating_s * label_type + food_type +
## Model: (1 + simulation_rating_s * label_type + food_type | pp) +
## Model: (1 + simulation_rating_s * label_type | food)
## Data: wide_data
## num Df den Df F Pr(>F)
## simulation_rating_s 1 128.799 416.4533 < 2.2e-16 ***
## label_type 1 42.846 0.3736 0.5443
## food_type 1 68.073 25.8858 3.054e-06 ***
## simulation_rating_s:label_type 1 43.966 1.1177 0.2962
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
For food type, the plot shows the two main effects, but no interaction. This is backed up by the model.
# plot the interaction
ggplot(wide_data,
aes(
x = simulation_rating_s,
y = attractiveness_rating
)
)+
geom_smooth(method = "lm", aes(color = food_type))
# set contrasts
options(contrasts = c("contr.sum", "contr.poly"))
# run the model
h3_interaction_food_type <-
mixed(
attractiveness_rating ~
simulation_rating_s*food_type + label_type +
(1 + simulation_rating_s*food_type + label_type | pp) +
(1 + simulation_rating_s | food),
wide_data,
type = 3,
method = "S"
)
## Fitting one lmer() model. [DONE]
## Calculating p-values. [DONE]
summary(h3_interaction_food_type)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula:
## attractiveness_rating ~ simulation_rating_s * food_type + label_type +
## (1 + simulation_rating_s * food_type + label_type | pp) +
## (1 + simulation_rating_s | food)
## Data: data
##
## REML criterion at convergence: 60535.6
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -4.0041 -0.5849 0.0733 0.6389 3.6820
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## pp (Intercept) 84.129 9.172
## simulation_rating_s 30.488 5.522 0.08
## food_type1 27.616 5.255 -0.13 -0.05
## label_type1 4.028 2.007 -0.38 0.56
## simulation_rating_s:food_type1 2.059 1.435 -0.41 0.53
## food (Intercept) 16.253 4.032
## simulation_rating_s 1.279 1.131 -0.35
## Residual 472.981 21.748
##
##
##
##
## -0.01
## 0.73 0.39
##
##
##
## Number of obs: 6638, groups: pp, 166; food, 40
##
## Fixed effects:
## Estimate Std. Error df t value
## (Intercept) 54.5797 1.0035 120.0325 54.391
## simulation_rating_s 12.4766 0.5858 131.4600 21.298
## food_type1 3.7513 0.8089 65.0207 4.638
## label_type1 -0.1728 0.3182 155.7765 -0.543
## simulation_rating_s:food_type1 0.4745 0.3813 60.6047 1.244
## Pr(>|t|)
## (Intercept) < 2e-16 ***
## simulation_rating_s < 2e-16 ***
## food_type1 1.75e-05 ***
## label_type1 0.588
## simulation_rating_s:food_type1 0.218
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) smlt__ fd_ty1 lbl_t1
## smltn_rtng_ -0.031
## food_type1 -0.044 -0.073
## label_type1 -0.126 0.289 -0.013
## smltn_r_:_1 -0.148 0.157 -0.023 0.049
## convergence code: 0
## boundary (singular) fit: see ?isSingular
anova(h3_interaction_food_type)
## Mixed Model Anova Table (Type 3 tests, S-method)
##
## Model: attractiveness_rating ~ simulation_rating_s * food_type + label_type +
## Model: (1 + simulation_rating_s * food_type + label_type | pp) +
## Model: (1 + simulation_rating_s | food)
## Data: wide_data
## num Df den Df F Pr(>F)
## simulation_rating_s 1 131.460 453.6241 < 2.2e-16 ***
## food_type 1 65.021 21.5095 1.75e-05 ***
## label_type 1 155.776 0.2949 0.5879
## simulation_rating_s:food_type 1 60.605 1.5487 0.2181
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
The preregistered hypothesis reads as follows: >The intention to reduce meat will be associated positively with attractiveness ratings for the plant-based foods, and negatively with desire (sic: simulations) for meat-based foods.
For this hypothesis, I run two interaction models, predicting attractiveness and simulations from the interaction of food_type and intention to reduce meat eating (change_diet).
Indeed, there is a positive raw relation between the intention to reduce eating meat and attractiveness ratings for plant-based foods. For meat-based foods, this relation looks flat. The model throws a convergence error, but that disappears when using the bobyqa optimizer.
# standardize intention to eat meat
attractiveness <-
attractiveness %>%
mutate(
change_diet_s = scale(change_diet, scale = TRUE)
)
# plot
ggplot(attractiveness,
aes(
x = change_diet_s,
y = rating
)
)+
geom_smooth(method = "lm", aes(color = food_type)) +
ylim(0, 100)
# set contrasts
options(contrasts = c("contr.sum", "contr.poly"))
# construct model
h4_attr <- lme4::lmer(
rating ~
change_diet_s * food_type +
(1 + food_type | pp) +
(1 | food),
attractiveness,
control = lmerControl(optimizer = "bobyqa")
)
# inspect model
summary(h4_attr)
## Linear mixed model fit by REML ['lmerMod']
## Formula: rating ~ change_diet_s * food_type + (1 + food_type | pp) + (1 |
## food)
## Data: attractiveness
## Control: lmerControl(optimizer = "bobyqa")
##
## REML criterion at convergence: 62019
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.2246 -0.6922 0.1051 0.7014 3.2292
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## pp (Intercept) 123.80 11.127
## food_type1 44.62 6.680 -0.01
## food (Intercept) 40.50 6.364
## Residual 601.45 24.525
## Number of obs: 6640, groups: pp, 166; food, 40
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 54.7616 1.3598 40.273
## change_diet_s 2.5847 0.9146 2.826
## food_type1 6.5694 1.1713 5.609
## change_diet_s:food_type1 -2.4460 0.5995 -4.080
##
## Correlation of Fixed Effects:
## (Intr) chng__ fd_ty1
## change_dt_s 0.000
## food_type1 -0.004 0.000
## chng_dt_:_1 0.000 -0.011 0.000
pp_59 and pp_129 are potential influential case. There are two potential outliers in foods: haddock, spring_rolls.
# inspect standardized residuals
densityplot(resid(h4_attr, scaled = TRUE))
# q-q plot
car::qqPlot(resid(h4_attr, scaled = TRUE))
## [1] 5497 316
# proportion of residuals in the extremes of the distribution
lmer_residuals(h4_attr)
## # A tibble: 3 x 4
## standardized_cutoff proportion_cutoff proportion below_cutoff
## <dbl> <dbl> <dbl> <lgl>
## 1 2 0.05 0.0370 TRUE
## 2 2.5 0.01 0.00783 TRUE
## 3 3 0.0001 0.000452 FALSE
# obtain outliers
h4_attr_outliers_pp <- influence.ME::influence(h4_attr, c("pp"))
h4_attr_outliers_food <- influence.ME::influence(h4_attr, c("food"))
# plot formal outliers for pp
plot(h4_attr_outliers_pp, which = 'cook')
plot(h4_attr_outliers_pp, which = 'dfbetas')[1]
plot(h4_attr_outliers_pp, which = 'dfbetas')[2]
plot(h4_attr_outliers_pp, which = 'dfbetas')[3]
plot(h4_attr_outliers_pp, which = 'dfbetas')[4]
# which ones are the highest
arrange_cook(h4_attr_outliers_pp, "pp")
## # A tibble: 6 x 2
## pp cooks_distance
## <chr> <dbl>
## 1 pp_59 0.0529
## 2 pp_129 0.0432
## 3 pp_53 0.0382
## 4 pp_63 0.0275
## 5 pp_178 0.0264
## 6 pp_158 0.0233
# plot formal outliers for food
plot(h4_attr_outliers_food, which = 'cook')
plot(h4_attr_outliers_food, which = 'dfbetas')[1]
plot(h4_attr_outliers_food, which = 'dfbetas')[2]
plot(h4_attr_outliers_food, which = 'dfbetas')[3]
plot(h4_attr_outliers_food, which = 'dfbetas')[4]
# which ones are so far out on cook's distance?
arrange_cook(h4_attr_outliers_food, "food")
## # A tibble: 6 x 2
## food cooks_distance
## <chr> <dbl>
## 1 haddock 0.0666
## 2 spring_rolls 0.0666
## 3 salsa_pizza 0.0422
## 4 vegan_fillets 0.0336
## 5 sausoyges 0.0324
## 6 fishless_cakes 0.0242
# plot the fitted vs. the residuals (to check for homo/heteroskedasticity) with added smoothed line.
plot(h4_attr, type = c('p', 'smooth'))
Next, we see whether the interaction is significant. It is, so I also estimate the simple slopes, which show the expected stronger association for plant-based compared to meat-based foods.
# get p-value
h4_attr_p <- mixed(
rating ~
change_diet_s * food_type +
(1 + food_type | pp) +
(1 | food),
attractiveness,
type = 3,
method = "S",
control = lmerControl(optimizer = "bobyqa")
)
## Fitting one lmer() model. [DONE]
## Calculating p-values. [DONE]
summary(h4_attr_p)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: rating ~ change_diet_s * food_type + (1 + food_type | pp) + (1 |
## food)
## Data: data
## Control: lmerControl(optimizer = "bobyqa")
##
## REML criterion at convergence: 62019
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.2246 -0.6922 0.1051 0.7014 3.2292
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## pp (Intercept) 123.80 11.127
## food_type1 44.62 6.680 -0.01
## food (Intercept) 40.50 6.364
## Residual 601.45 24.525
## Number of obs: 6640, groups: pp, 166; food, 40
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 54.7616 1.3598 94.1965 40.273 < 2e-16 ***
## change_diet_s 2.5847 0.9146 164.0000 2.826 0.0053 **
## food_type1 6.5694 1.1713 57.3627 5.609 6.14e-07 ***
## change_diet_s:food_type1 -2.4460 0.5995 164.0001 -4.080 7.02e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) chng__ fd_ty1
## change_dt_s 0.000
## food_type1 -0.004 0.000
## chng_dt_:_1 0.000 -0.011 0.000
# simple slopes
emtrends(
h4_attr,
pairwise ~ food_type,
var = "change_diet_s"
)
## $emtrends
## food_type change_diet_s.trend SE df asymp.LCL asymp.UCL
## meat_based 0.139 1.09 Inf -1.99 2.27
## plant_based 5.031 1.10 Inf 2.88 7.19
##
## Degrees-of-freedom method: asymptotic
## Confidence level used: 0.95
##
## $contrasts
## contrast estimate SE df z.ratio p.value
## meat_based - plant_based -4.89 1.2 Inf -4.080 <.0001
##
## Degrees-of-freedom method: asymptotic
# get approximate effect size
r.squaredGLMM(h4_attr)
## R2m R2c
## [1,] 0.06445011 0.305648
In the simulation model, we see the opposite of the hypothesis: There is a stronger positive raw association for plant-based compared to meat-based foods, and meat-based foods evoke stronger simulations overall.
# standardize intention to eat meat
simulations <-
simulations %>%
mutate(
change_diet_s = scale(change_diet, scale = TRUE)
)
# plot
ggplot(simulations,
aes(
x = change_diet_s,
y = rating
)
)+
geom_smooth(method = "lm", aes(color = food_type)) +
ylim(0, 100)
# set contrasts
options(contrasts = c("contr.sum", "contr.poly"))
# construct model
h4_sim <- lme4::lmer(
rating ~
change_diet_s * food_type +
(1 + food_type | pp) +
(1 | food),
simulations
)
# inspect model
summary(h4_sim)
## Linear mixed model fit by REML ['lmerMod']
## Formula: rating ~ change_diet_s * food_type + (1 + food_type | pp) + (1 |
## food)
## Data: simulations
##
## REML criterion at convergence: 61219.1
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.4237 -0.6593 0.1398 0.6817 3.1020
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## pp (Intercept) 140.85 11.868
## food_type1 34.15 5.844 -0.10
## food (Intercept) 25.64 5.064
## Residual 534.14 23.111
## Number of obs: 6638, groups: pp, 166; food, 40
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 58.9887 1.2530 47.077
## change_diet_s 2.0796 0.9639 2.157
## food_type1 5.9118 0.9630 6.139
## change_diet_s:food_type1 -1.1978 0.5350 -2.239
##
## Correlation of Fixed Effects:
## (Intr) chng__ fd_ty1
## change_dt_s 0.000
## food_type1 -0.034 0.000
## chng_dt_:_1 0.000 -0.081 0.000
pp_103 is a potential influential case. There are three potential outliers in foods: vegan_fillets, haddock, salsa_pizza.
# inspect standardized residuals
densityplot(resid(h4_sim, scaled = TRUE))
# q-q plot
car::qqPlot(resid(h4_sim, scaled = TRUE))
## [1] 3277 3469
# proportion of residuals in the extremes of the distribution
lmer_residuals(h4_sim)
## # A tibble: 3 x 4
## standardized_cutoff proportion_cutoff proportion below_cutoff
## <dbl> <dbl> <dbl> <lgl>
## 1 2 0.05 0.0404 TRUE
## 2 2.5 0.01 0.0105 FALSE
## 3 3 0.0001 0.00181 FALSE
# obtain outliers
h4_sim_outliers_pp <- influence.ME::influence(h4_sim, c("pp"))
h4_sim_outliers_food <- influence.ME::influence(h4_sim, c("food"))
# plot formal outliers for pp
plot(h4_sim_outliers_pp, which = 'cook')
plot(h4_sim_outliers_pp, which = 'dfbetas')[1]
plot(h4_sim_outliers_pp, which = 'dfbetas')[2]
plot(h4_sim_outliers_pp, which = 'dfbetas')[3]
plot(h4_sim_outliers_pp, which = 'dfbetas')[4]
# which ones are the highest
arrange_cook(h4_sim_outliers_pp, "pp")
## # A tibble: 6 x 2
## pp cooks_distance
## <chr> <dbl>
## 1 pp_103 0.0468
## 2 pp_105 0.0341
## 3 pp_88 0.0293
## 4 pp_158 0.0270
## 5 pp_12 0.0268
## 6 pp_129 0.0250
# plot formal outliers for food
plot(h4_sim_outliers_food, which = 'cook')
plot(h4_sim_outliers_food, which = 'dfbetas')[1]
plot(h4_sim_outliers_food, which = 'dfbetas')[2]
plot(h4_sim_outliers_food, which = 'dfbetas')[3]
plot(h4_sim_outliers_food, which = 'dfbetas')[4]
# which ones are so far out on cook's distance?
arrange_cook(h4_sim_outliers_food, "food")
## # A tibble: 6 x 2
## food cooks_distance
## <chr> <dbl>
## 1 vegan_fillets 0.0608
## 2 haddock 0.0597
## 3 salsa_pizza 0.0547
## 4 hash_browns 0.0306
## 5 pasta_bake 0.0251
## 6 spring_rolls 0.0247
# plot the fitted vs. the residuals (to check for homo/heteroskedasticity) with added smoothed line.
plot(h4_sim, type = c('p', 'smooth'))
Next, we see whether the interaction is significant. It is, but barely. I also estimate the simple slopes, which show a stronger association for plant-based compared to meat-based foods.
# get p-value
h4_sim_p <- mixed(
rating ~
change_diet_s * food_type +
(1 + food_type | pp) +
(1 | food),
simulations,
type = 3,
method = "S"
)
## Fitting one lmer() model. [DONE]
## Calculating p-values. [DONE]
anova(h4_sim_p)
## Mixed Model Anova Table (Type 3 tests, S-method)
##
## Model: rating ~ change_diet_s * food_type + (1 + food_type | pp) + (1 |
## Model: food)
## Data: simulations
## num Df den Df F Pr(>F)
## change_diet_s 1 164.000 4.6545 0.03243 *
## food_type 1 60.545 37.6899 6.977e-08 ***
## change_diet_s:food_type 1 164.009 5.0125 0.02651 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# simple slopes
emtrends(
h4_sim,
pairwise ~ food_type,
var = "change_diet_s"
)
## $emtrends
## food_type change_diet_s.trend SE df asymp.LCL asymp.UCL
## meat_based 0.882 1.06 Inf -1.20 2.97
## plant_based 3.277 1.14 Inf 1.04 5.51
##
## Degrees-of-freedom method: asymptotic
## Confidence level used: 0.95
##
## $contrasts
## contrast estimate SE df z.ratio p.value
## meat_based - plant_based -2.4 1.07 Inf -2.239 0.0252
##
## Degrees-of-freedom method: asymptotic
# get approximate effect size
r.squaredGLMM(h4_sim)
## R2m R2c
## [1,] 0.05250355 0.3112413
Just as before, I check whether the inclusion of those with a high RSI change anything about the results. The results don’t change for attractiveness, but for simulations the main effect of the intention to reduce eating meat is not significant anymore.
# get p-value for attractiveness
h4_attr_p_rsi <-
mixed(
rating ~
change_diet_s * food_type +
(1 + food_type | pp) +
(1 | food),
data = full_join( # add high rsi cases
attractiveness,
high_rsi %>%
filter(measure == "attractiveness") %>%
mutate(change_diet_s = scale(change_diet, scale = TRUE))
),
type = 3,
method = "S",
control = lmerControl(optimizer = "bobyqa")
)
## Fitting one lmer() model. [DONE]
## Calculating p-values. [DONE]
anova(h4_attr_p_rsi)
## Mixed Model Anova Table (Type 3 tests, S-method)
##
## Model: rating ~ change_diet_s * food_type + (1 + food_type | pp) + (1 |
## Model: food)
## Data: full_join
## Data: attractiveness
## Data: high_rsi %>% filter(measure == "attractiveness") %>% mutate(change_diet_s = scale(change_diet, scale = TRUE))
## num Df den Df F Pr(>F)
## change_diet_s 1 172.000 9.4235 0.00249 **
## food_type 1 56.208 31.0611 7.366e-07 ***
## change_diet_s:food_type 1 172.000 17.2397 5.177e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# get p-value for simulations
h4_sim_p_rsi <-
mixed(
rating ~
change_diet_s * food_type +
(1 + food_type | pp) +
(1 | food),
data = full_join( # add high rsi cases
simulations,
high_rsi %>%
filter(measure == "simulations") %>%
mutate(change_diet_s = scale(change_diet, scale = TRUE))
),
type = 3,
method = "S"
)
## Fitting one lmer() model. [DONE]
## Calculating p-values. [DONE]
anova(h4_sim_p_rsi)
## Mixed Model Anova Table (Type 3 tests, S-method)
##
## Model: rating ~ change_diet_s * food_type + (1 + food_type | pp) + (1 |
## Model: food)
## Data: full_join
## Data: simulations
## Data: high_rsi %>% filter(measure == "simulations") %>% mutate(change_diet_s = scale(change_diet, scale = TRUE))
## num Df den Df F Pr(>F)
## change_diet_s 1 171.993 3.8364 0.05177 .
## food_type 1 60.271 37.0605 8.64e-08 ***
## change_diet_s:food_type 1 172.002 5.2849 0.02271 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
As a next step, I see whether the results are robust to the exclusion of the outliers I identified above. I start with the outliers on attractiveness: the effect remains significant.
# get p-value
h4_attr_no_outliers <- mixed(
rating ~
change_diet_s * food_type +
(1 + food_type | pp) +
(1 | food),
attractiveness %>%
filter(!pp %in% c("pp_59", "pp_129")) %>%
filter(!food %in% c("haddock", "spring_rolls")),
type = 3,
method = "S",
control = lmerControl(optimizer = "bobyqa")
)
## Fitting one lmer() model. [DONE]
## Calculating p-values. [DONE]
anova(h4_attr_no_outliers)
## Mixed Model Anova Table (Type 3 tests, S-method)
##
## Model: rating ~ change_diet_s * food_type + (1 + food_type | pp) + (1 |
## Model: food)
## Data: %>%
## Data: attractiveness %>% filter(!pp %in% c("pp_59", "pp_129"))
## Data: filter(!food %in% c("haddock", "spring_rolls"))
## num Df den Df F Pr(>F)
## change_diet_s 1 162.000 8.141 0.004893 **
## food_type 1 59.779 42.179 1.834e-08 ***
## change_diet_s:food_type 1 162.000 21.307 7.923e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Next, I remove the outliers on simulations: the interaction effect remains robust, but the main effect does not.
# get p-value
h4_sim_no_outliers <- mixed(
rating ~
change_diet_s * food_type +
(1 + food_type | pp) +
(1 | food),
simulations %>%
filter(pp != "pp_103") %>%
filter(!food %in% c("vegan_fillets", "haddock", "salsa_pizza")),
type = 3,
method = "S"
)
## Fitting one lmer() model. [DONE]
## Calculating p-values. [DONE]
anova(h4_sim_no_outliers)
## Mixed Model Anova Table (Type 3 tests, S-method)
##
## Model: rating ~ change_diet_s * food_type + (1 + food_type | pp) + (1 |
## Model: food)
## Data: %>%
## Data: simulations %>% filter(pp != "pp_103")
## Data: filter(!food %in% c("vegan_fillets", "haddock", "salsa_pizza"))
## num Df den Df F Pr(>F)
## change_diet_s 1 163.023 3.3918 0.067338 .
## food_type 1 66.196 49.1523 1.529e-09 ***
## change_diet_s:food_type 1 163.022 7.7708 0.005941 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Out of interest, I’d like to see the interaction in H4 with meat eating frequency.
# standardize the predictor
attractiveness <-
attractiveness %>%
mutate(
meat_per_week_s = scale(meat_per_week, scale = TRUE)
)
# plot
ggplot(attractiveness,
aes(
x = meat_per_week_s,
y = rating
)
)+
geom_smooth(method = "lm", aes(color = food_type)) +
ylim(0, 100)
# set contrasts
options(contrasts = c("contr.sum", "contr.poly"))
# fit model
exp_attr_meat <- lme4::lmer(
rating ~
meat_per_week_s * food_type +
(1 + food_type | pp) +
(1 | food),
attractiveness)
summary(exp_attr_meat)
## Linear mixed model fit by REML ['lmerMod']
## Formula: rating ~ meat_per_week_s * food_type + (1 + food_type | pp) +
## (1 | food)
## Data: attractiveness
##
## REML criterion at convergence: 62015.9
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.2016 -0.6797 0.1084 0.6989 3.2461
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## pp (Intercept) 129.85 11.395
## food_type1 41.24 6.422 -0.07
## food (Intercept) 40.51 6.365
## Residual 601.45 24.525
## Number of obs: 6640, groups: pp, 166; food, 40
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 54.7616 1.3732 39.880
## meat_per_week_s -0.8373 0.9343 -0.896
## food_type1 6.5694 1.1626 5.650
## meat_per_week_s:food_type1 3.0546 0.5823 5.246
##
## Correlation of Fixed Effects:
## (Intr) mt_p__ fd_ty1
## met_pr_wk_s 0.000
## food_type1 -0.018 0.000
## mt_pr_w_:_1 0.000 -0.054 0.000
# get p-value
exp_attr_meat_p <- mixed(
rating ~
meat_per_week_s * food_type +
(1 + food_type | pp) +
(1 | food),
attractiveness,
type = 3,
method = "S")
## Fitting one lmer() model. [DONE]
## Calculating p-values. [DONE]
anova(exp_attr_meat_p)
## Mixed Model Anova Table (Type 3 tests, S-method)
##
## Model: rating ~ meat_per_week_s * food_type + (1 + food_type | pp) +
## Model: (1 | food)
## Data: attractiveness
## num Df den Df F Pr(>F)
## meat_per_week_s 1 164.006 0.803 0.3715
## food_type 1 55.804 31.927 5.643e-07 ***
## meat_per_week_s:food_type 1 163.992 27.519 4.749e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# simple slopes
emtrends(
exp_attr_meat_p,
pairwise ~ food_type,
var = "meat_per_week_s"
)
## $emtrends
## food_type meat_per_week_s.trend SE df asymp.LCL asymp.UCL
## meat_based 2.22 1.07 Inf 0.112 4.32
## plant_based -3.89 1.13 Inf -6.101 -1.68
##
## Degrees-of-freedom method: asymptotic
## Confidence level used: 0.95
##
## $contrasts
## contrast estimate SE df z.ratio p.value
## meat_based - plant_based 6.11 1.16 Inf 5.246 <.0001
##
## Degrees-of-freedom method: asymptotic
# get approximate effect size
r.squaredGLMM(exp_attr_meat)
## R2m R2c
## [1,] 0.06140852 0.3056774
The same for simulations.
# standardize the predictor
simulations <-
simulations %>%
mutate(
meat_per_week_s = scale(meat_per_week, scale = TRUE)
)
# plot
ggplot(simulations,
aes(
x = meat_per_week_s,
y = rating
)
)+
geom_smooth(method = "lm", aes(color = food_type)) +
ylim(0, 100)
# set contrasts
options(contrasts = c("contr.sum", "contr.poly"))
# fit model
exp_sim_meat <- lme4::lmer(
rating ~
meat_per_week_s * food_type +
(1 + food_type | pp) +
(1 | food),
simulations)
summary(exp_sim_meat)
## Linear mixed model fit by REML ['lmerMod']
## Formula: rating ~ meat_per_week_s * food_type + (1 + food_type | pp) +
## (1 | food)
## Data: simulations
##
## REML criterion at convergence: 61210.7
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.3937 -0.6574 0.1366 0.6861 3.1171
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## pp (Intercept) 143.03 11.959
## food_type1 31.11 5.577 -0.09
## food (Intercept) 25.64 5.064
## Residual 534.14 23.111
## Number of obs: 6638, groups: pp, 166; food, 40
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 58.9889 1.2582 46.883
## meat_per_week_s -1.4737 0.9707 -1.518
## food_type1 5.9120 0.9533 6.201
## meat_per_week_s:food_type1 2.1085 0.5176 4.073
##
## Correlation of Fixed Effects:
## (Intr) mt_p__ fd_ty1
## met_pr_wk_s 0.000
## food_type1 -0.031 0.000
## mt_pr_w_:_1 0.000 -0.075 0.000
# get p-value
exp_sim_meat_p <- mixed(
rating ~
meat_per_week_s * food_type +
(1 + food_type | pp) +
(1 | food),
simulations,
type = 3,
method = "S")
## Fitting one lmer() model. [DONE]
## Calculating p-values. [DONE]
anova(exp_sim_meat_p)
## Mixed Model Anova Table (Type 3 tests, S-method)
##
## Model: rating ~ meat_per_week_s * food_type + (1 + food_type | pp) +
## Model: (1 | food)
## Data: simulations
## num Df den Df F Pr(>F)
## meat_per_week_s 1 164.018 2.3049 0.1309
## food_type 1 58.431 38.4574 6.173e-08 ***
## meat_per_week_s:food_type 1 164.044 16.5918 7.200e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# simple slopes
emtrends(
exp_sim_meat_p,
pairwise ~ food_type,
var = "meat_per_week_s"
)
## $emtrends
## food_type meat_per_week_s.trend SE df asymp.LCL asymp.UCL
## meat_based 0.635 1.07 Inf -1.45 2.72
## plant_based -3.582 1.13 Inf -5.80 -1.36
##
## Degrees-of-freedom method: asymptotic
## Confidence level used: 0.95
##
## $contrasts
## contrast estimate SE df z.ratio p.value
## meat_based - plant_based 4.22 1.04 Inf 4.073 <.0001
##
## Degrees-of-freedom method: asymptotic
# get approximate effect size
r.squaredGLMM(exp_sim_meat)
## R2m R2c
## [1,] 0.05360835 0.3112228
Another interest: Do labels help frequent meat eaters in finding plant-based foods more attractive? Specifically, do enhanced labels reduce the association between frequency of eating meat and simulations/attractiveness (only plant-based).
I check that with attractiveness first. Indeed, the more meat people eat, the less they find plant-based foods attractive, but less so for enhanced food labels.
# plot
ggplot(attractiveness %>%
filter(food_type == "plant_based"),
aes(
x = meat_per_week_s,
y = rating
)
)+
geom_smooth(method = "lm", aes(color = label_type)) +
ylim(0, 100)
# set contrasts
options(contrasts = c("contr.sum", "contr.poly"))
# fit model
exp_attr_meat_labels <- lme4::lmer(
rating ~
meat_per_week_s * label_type +
(1 + label_type | pp) +
(1 + label_type | food),
attractiveness %>%
filter(food_type == "plant_based"))
summary(exp_attr_meat_labels)
## Linear mixed model fit by REML ['lmerMod']
## Formula: rating ~ meat_per_week_s * label_type + (1 + label_type | pp) +
## (1 + label_type | food)
## Data: attractiveness %>% filter(food_type == "plant_based")
##
## REML criterion at convergence: 31037.5
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.9383 -0.7090 0.0365 0.7153 3.2246
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## pp (Intercept) 181.148 13.459
## label_type1 2.544 1.595 -0.40
## food (Intercept) 50.322 7.094
## label_type1 10.627 3.260 0.33
## Residual 595.413 24.401
## Number of obs: 3320, groups: pp, 166; food, 20
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 48.1951 1.9460 24.767
## meat_per_week_s -3.8988 1.1274 -3.458
## label_type1 -1.7861 0.8521 -2.096
## meat_per_week_s:label_type1 -1.2626 0.4414 -2.860
##
## Correlation of Fixed Effects:
## (Intr) mt_p__ lbl_t1
## met_pr_wk_s 0.000
## label_type1 0.199 0.000
## mt_pr_w_:_1 0.000 -0.103 0.000
# get p-value
exp_attr_meat_labels_p <- mixed(
rating ~
meat_per_week_s * label_type +
(1 + label_type | pp) +
(1 + label_type | food),
attractiveness %>%
filter(food_type == "plant_based"),
type = 3,
method = "S")
## Fitting one lmer() model. [DONE]
## Calculating p-values. [DONE]
anova(exp_attr_meat_labels_p)
## Mixed Model Anova Table (Type 3 tests, S-method)
##
## Model: rating ~ meat_per_week_s * label_type + (1 + label_type | pp) +
## Model: (1 + label_type | food)
## Data: %>%
## Data: attractiveness
## Data: filter(food_type == "plant_based")
## num Df den Df F Pr(>F)
## meat_per_week_s 1 163.881 11.9587 0.0006931 ***
## label_type 1 19.155 4.3939 0.0495763 *
## meat_per_week_s:label_type 1 163.119 8.1817 0.0047856 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# simple slopes
emtrends(
exp_attr_meat_labels_p,
pairwise ~ label_type,
var = "meat_per_week_s"
)
## $emtrends
## label_type meat_per_week_s.trend SE df asymp.LCL asymp.UCL
## control -5.16 1.17 Inf -7.45 -2.873
## enhanced -2.64 1.25 Inf -5.09 -0.182
##
## Degrees-of-freedom method: asymptotic
## Confidence level used: 0.95
##
## $contrasts
## contrast estimate SE df z.ratio p.value
## control - enhanced -2.53 0.883 Inf -2.860 0.0042
##
## Degrees-of-freedom method: asymptotic
# get approximate effect size
r.squaredGLMM(exp_attr_meat_labels)
## R2m R2c
## [1,] 0.02324102 0.3076937
The pattern looks similar for simulations, but the interaction is not significant.
# plot
ggplot(simulations %>%
filter(food_type == "plant_based"),
aes(
x = meat_per_week_s,
y = rating
)
)+
geom_smooth(method = "lm", aes(color = label_type)) +
ylim(0, 100)
# set contrasts
options(contrasts = c("contr.sum", "contr.poly"))
# fit model
exp_sim_meat_labels <- lme4::lmer(
rating ~
meat_per_week_s * label_type +
(1 + label_type | pp) +
(1 + label_type | food),
simulations %>%
filter(food_type == "plant_based"),
control = lmerControl(optimizer = "bobyqa"))
summary(exp_sim_meat_labels)
## Linear mixed model fit by REML ['lmerMod']
## Formula: rating ~ meat_per_week_s * label_type + (1 + label_type | pp) +
## (1 + label_type | food)
## Data: simulations %>% filter(food_type == "plant_based")
## Control: lmerControl(optimizer = "bobyqa")
##
## REML criterion at convergence: 30693
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.12410 -0.69306 0.07054 0.67510 3.01615
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## pp (Intercept) 187.726 13.701
## label_type1 25.675 5.067 0.11
## food (Intercept) 33.916 5.824
## label_type1 7.776 2.789 -0.03
## Residual 517.660 22.752
## Number of obs: 3320, groups: pp, 166; food, 20
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 53.0781 1.7270 30.734
## meat_per_week_s -3.5850 1.1346 -3.160
## label_type1 -3.9745 0.8363 -4.752
## meat_per_week_s:label_type1 -0.9924 0.5575 -1.780
##
## Correlation of Fixed Effects:
## (Intr) mt_p__ lbl_t1
## met_pr_wk_s 0.000
## label_type1 0.015 0.000
## mt_pr_w_:_1 0.000 0.075 0.000
# get p-value
exp_sim_meat_labels_p <- mixed(
rating ~
meat_per_week_s * label_type +
(1 + label_type | pp) +
(1 + label_type | food),
simulations %>%
filter(food_type == "plant_based"),
type = 3,
method = "S",
control = lmerControl(optimizer = "bobyqa"))
## Fitting one lmer() model. [DONE]
## Calculating p-values. [DONE]
anova(exp_sim_meat_labels_p)
## Mixed Model Anova Table (Type 3 tests, S-method)
##
## Model: rating ~ meat_per_week_s * label_type + (1 + label_type | pp) +
## Model: (1 + label_type | food)
## Data: %>%
## Data: simulations
## Data: filter(food_type == "plant_based")
## num Df den Df F Pr(>F)
## meat_per_week_s 1 163.943 9.9842 0.001881 **
## label_type 1 29.395 22.5848 4.899e-05 ***
## meat_per_week_s:label_type 1 163.236 3.1679 0.076960 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# simple slopes
emtrends(
exp_sim_meat_labels_p,
pairwise ~ label_type,
var = "meat_per_week_s"
)
## $emtrends
## label_type meat_per_week_s.trend SE df asymp.LCL asymp.UCL
## control -4.58 1.30 Inf -7.13 -2.027
## enhanced -2.59 1.23 Inf -5.00 -0.189
##
## Degrees-of-freedom method: asymptotic
## Confidence level used: 0.95
##
## $contrasts
## contrast estimate SE df z.ratio p.value
## control - enhanced -1.98 1.12 Inf -1.780 0.0751
##
## Degrees-of-freedom method: asymptotic
# get approximate effect size
r.squaredGLMM(exp_sim_meat_labels)
## R2m R2c
## [1,] 0.03693944 0.3548545
I’d also be interested whether a feeling of hunger interacts with food type when predicting attractiveness. It does such that for meat-based foods an increase in hunger is associated with higher attractiveness, whereas the opposite is the case for plant-based foods.
# scale hunger
attractiveness <-
attractiveness %>%
mutate(hungry_s = scale(hungry, center = TRUE, scale = FALSE))
# plot
ggplot(attractiveness,
aes(
x = hungry_s,
y = rating
)
)+
geom_smooth(method = "lm", aes(color = food_type)) +
ylim(0, 100)
# set contrasts
options(contrasts = c("contr.sum", "contr.poly"))
# fit model
exp_attr_hunger_food <- lme4::lmer(
rating ~
hungry_s * food_type +
(1 + food_type | pp) +
(1 | food),
attractiveness
)
summary(exp_attr_hunger_food)
## Linear mixed model fit by REML ['lmerMod']
## Formula: rating ~ hungry_s * food_type + (1 + food_type | pp) + (1 | food)
## Data: attractiveness
##
## REML criterion at convergence: 62047.3
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.2863 -0.6830 0.1065 0.6964 3.2285
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## pp (Intercept) 130.56 11.426
## food_type1 47.83 6.916 -0.09
## food (Intercept) 40.50 6.364
## Residual 601.46 24.525
## Number of obs: 6640, groups: pp, 166; food, 40
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 54.761595 1.374591 39.838
## hungry_s 0.001482 0.037348 0.040
## food_type1 6.569393 1.179460 5.570
## hungry_s:food_type1 0.066941 0.024541 2.728
##
## Correlation of Fixed Effects:
## (Intr) hngry_ fd_ty1
## hungry_s 0.000
## food_type1 -0.028 0.000
## hngry_s:f_1 0.000 -0.078 0.000
# get p-value
exp_attr_hunger_food_p <- mixed(
rating ~
hungry_s * food_type +
(1 + food_type | pp) +
(1 | food),
attractiveness,
type = 3,
method = "S"
)
## Fitting one lmer() model. [DONE]
## Calculating p-values. [DONE]
anova(exp_attr_hunger_food_p)
## Mixed Model Anova Table (Type 3 tests, S-method)
##
## Model: rating ~ hungry_s * food_type + (1 + food_type | pp) + (1 | food)
## Data: attractiveness
## num Df den Df F Pr(>F)
## hungry_s 1 164.008 0.0016 0.968393
## food_type 1 58.846 31.0230 6.651e-07 ***
## hungry_s:food_type 1 164.004 7.4406 0.007072 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# simple slopes
emtrends(
exp_attr_hunger_food_p,
pairwise ~ food_type,
var = "hungry_s"
)
## $emtrends
## food_type hungry_s.trend SE df asymp.LCL asymp.UCL
## meat_based 0.0684 0.0431 Inf -0.016 0.1528
## plant_based -0.0655 0.0463 Inf -0.156 0.0252
##
## Degrees-of-freedom method: asymptotic
## Confidence level used: 0.95
##
## $contrasts
## contrast estimate SE df z.ratio p.value
## meat_based - plant_based 0.134 0.0491 Inf 2.728 0.0064
##
## Degrees-of-freedom method: asymptotic
# get approximate effect size
r.squaredGLMM(exp_attr_hunger_food)
## R2m R2c
## [1,] 0.05307852 0.3057325
Last, I write the final analysis files. I also save the six models from which I will extract parameters for plotting (see plots.Rmd).
# save models
save(
h4_attr,
h4_sim,
exp_attr_meat,
exp_sim_meat,
exp_attr_meat_labels,
exp_sim_meat_labels,
exp_attr_hunger_food,
file = here("plots", "models", "models.RData")
)
# final file
write_csv(working_file, here("data", "study2", "analysis_file.csv"))
# attractiveness only
write_csv(attractiveness, here("data", "study2", "attractiveness.csv"))
# simulations only
write_csv(simulations, here("data", "study2", "simulations.csv"))